From 4e92d9c93c5a2e432d9dde2c3e4fdaf4505b4bec Mon Sep 17 00:00:00 2001 From: Matthew Wells Date: Mon, 17 Jun 2024 15:52:16 -0500 Subject: [PATCH 1/4] started intitial creation of test workflows --- locidex/classes/extractor.py | 6 - tests/test_data/genomes/G1.fasta | 2 + tests/test_data/genomes/G10.fasta | 2 + tests/test_data/genomes/G11.fasta | 2 + tests/test_data/genomes/G12.fasta | 2 + tests/test_data/genomes/G13.fasta | 2 + tests/test_data/genomes/G14.fasta | 2 + tests/test_data/genomes/G2.fasta | 2 + tests/test_data/genomes/G3.fasta | 2 + tests/test_data/genomes/G4.fasta | 2 + tests/test_data/genomes/G5.fasta | 2 + tests/test_data/genomes/G6.fasta | 2 + tests/test_data/genomes/G7.fasta | 2 + tests/test_data/genomes/G8.fasta | 2 + tests/test_data/genomes/G9.fasta | 2 + .../db/blast/nucleotide/nucleotide.fasta | 40 + .../db/blast/nucleotide/nucleotide.ndb | Bin 0 -> 20480 bytes .../db/blast/nucleotide/nucleotide.nhr | Bin 0 -> 1290 bytes .../db/blast/nucleotide/nucleotide.nin | Bin 0 -> 372 bytes .../db/blast/nucleotide/nucleotide.njs | 22 + .../db/blast/nucleotide/nucleotide.not | Bin 0 -> 248 bytes .../db/blast/nucleotide/nucleotide.nsq | Bin 0 -> 5674 bytes .../db/blast/nucleotide/nucleotide.ntf | Bin 0 -> 16384 bytes .../db/blast/nucleotide/nucleotide.nto | Bin 0 -> 84 bytes .../outputs/db/blast/protein/protein.fasta | 40 + .../outputs/db/blast/protein/protein.pdb | Bin 0 -> 20480 bytes .../outputs/db/blast/protein/protein.phr | Bin 0 -> 1290 bytes .../outputs/db/blast/protein/protein.pin | Bin 0 -> 280 bytes .../outputs/db/blast/protein/protein.pjs | 22 + .../outputs/db/blast/protein/protein.pot | Bin 0 -> 248 bytes .../outputs/db/blast/protein/protein.psq | Bin 0 -> 7568 bytes .../outputs/db/blast/protein/protein.ptf | Bin 0 -> 16384 bytes .../outputs/db/blast/protein/protein.pto | Bin 0 -> 84 bytes tests/test_data/outputs/db/config.json | 12 + tests/test_data/outputs/db/meta.json | 455 +++++ tests/test_data/outputs/db/results.json | 15 + .../outputs/extract/G1/blast/hsps.txt | 20 + .../outputs/extract/G1/blast_db/contigs.fasta | 2 + .../extract/G1/blast_db/contigs.fasta.ndb | Bin 0 -> 20480 bytes .../extract/G1/blast_db/contigs.fasta.nhr | Bin 0 -> 64 bytes .../extract/G1/blast_db/contigs.fasta.nin | Bin 0 -> 144 bytes .../extract/G1/blast_db/contigs.fasta.njs | 22 + .../extract/G1/blast_db/contigs.fasta.not | Bin 0 -> 20 bytes .../extract/G1/blast_db/contigs.fasta.nsq | Bin 0 -> 5808 bytes .../extract/G1/blast_db/contigs.fasta.ntf | Bin 0 -> 16384 bytes .../extract/G1/blast_db/contigs.fasta.nto | Bin 0 -> 8 bytes .../outputs/extract/G1/filtered.hsps.txt | 21 + .../extract/G1/processed.extracted.seqs.fasta | 40 + .../extract/G1/raw.extracted.seqs.fasta | 40 + .../test_data/outputs/extract/G1/seq_data.txt | 21 + .../outputs/extract/G10/blast/hsps.txt | 20 + .../extract/G10/blast_db/contigs.fasta | 2 + .../extract/G10/blast_db/contigs.fasta.ndb | Bin 0 -> 20480 bytes .../extract/G10/blast_db/contigs.fasta.nhr | Bin 0 -> 64 bytes .../extract/G10/blast_db/contigs.fasta.nin | Bin 0 -> 152 bytes .../extract/G10/blast_db/contigs.fasta.njs | 22 + .../extract/G10/blast_db/contigs.fasta.not | Bin 0 -> 20 bytes .../extract/G10/blast_db/contigs.fasta.nsq | Bin 0 -> 5750 bytes .../extract/G10/blast_db/contigs.fasta.ntf | Bin 0 -> 16384 bytes .../extract/G10/blast_db/contigs.fasta.nto | Bin 0 -> 8 bytes .../outputs/extract/G10/filtered.hsps.txt | 20 + .../G10/processed.extracted.seqs.fasta | 38 + .../extract/G10/raw.extracted.seqs.fasta | 38 + .../outputs/extract/G10/seq_data.txt | 20 + .../outputs/extract/G11/blast/hsps.txt | 21 + .../extract/G11/blast_db/contigs.fasta | 2 + .../extract/G11/blast_db/contigs.fasta.ndb | Bin 0 -> 20480 bytes .../extract/G11/blast_db/contigs.fasta.nhr | Bin 0 -> 64 bytes .../extract/G11/blast_db/contigs.fasta.nin | Bin 0 -> 152 bytes .../extract/G11/blast_db/contigs.fasta.njs | 22 + .../extract/G11/blast_db/contigs.fasta.not | Bin 0 -> 20 bytes .../extract/G11/blast_db/contigs.fasta.nsq | Bin 0 -> 6057 bytes .../extract/G11/blast_db/contigs.fasta.ntf | Bin 0 -> 16384 bytes .../extract/G11/blast_db/contigs.fasta.nto | Bin 0 -> 8 bytes .../outputs/extract/G11/filtered.hsps.txt | 24 + .../G11/processed.extracted.seqs.fasta | 42 + .../extract/G11/raw.extracted.seqs.fasta | 42 + .../outputs/extract/G11/seq_data.txt | 22 + .../outputs/extract/G12/blast/hsps.txt | 21 + .../extract/G12/blast_db/contigs.fasta | 2 + .../extract/G12/blast_db/contigs.fasta.ndb | Bin 0 -> 20480 bytes .../extract/G12/blast_db/contigs.fasta.nhr | Bin 0 -> 64 bytes .../extract/G12/blast_db/contigs.fasta.nin | Bin 0 -> 152 bytes .../extract/G12/blast_db/contigs.fasta.njs | 22 + .../extract/G12/blast_db/contigs.fasta.not | Bin 0 -> 20 bytes .../extract/G12/blast_db/contigs.fasta.nsq | Bin 0 -> 6057 bytes .../extract/G12/blast_db/contigs.fasta.ntf | Bin 0 -> 16384 bytes .../extract/G12/blast_db/contigs.fasta.nto | Bin 0 -> 8 bytes .../outputs/extract/G12/filtered.hsps.txt | 24 + .../G12/processed.extracted.seqs.fasta | 42 + .../extract/G12/raw.extracted.seqs.fasta | 42 + .../outputs/extract/G12/seq_data.txt | 22 + .../outputs/extract/G13/blast/hsps.txt | 21 + .../extract/G13/blast_db/contigs.fasta | 2 + .../extract/G13/blast_db/contigs.fasta.ndb | Bin 0 -> 20480 bytes .../extract/G13/blast_db/contigs.fasta.nhr | Bin 0 -> 64 bytes .../extract/G13/blast_db/contigs.fasta.nin | Bin 0 -> 152 bytes .../extract/G13/blast_db/contigs.fasta.njs | 22 + .../extract/G13/blast_db/contigs.fasta.not | Bin 0 -> 20 bytes .../extract/G13/blast_db/contigs.fasta.nsq | Bin 0 -> 6089 bytes .../extract/G13/blast_db/contigs.fasta.ntf | Bin 0 -> 16384 bytes .../extract/G13/blast_db/contigs.fasta.nto | Bin 0 -> 8 bytes .../outputs/extract/G13/filtered.hsps.txt | 24 + .../G13/processed.extracted.seqs.fasta | 42 + .../extract/G13/raw.extracted.seqs.fasta | 42 + .../outputs/extract/G13/seq_data.txt | 22 + .../outputs/extract/G14/blast/hsps.txt | 21 + .../extract/G14/blast_db/contigs.fasta | 2 + .../extract/G14/blast_db/contigs.fasta.ndb | Bin 0 -> 20480 bytes .../extract/G14/blast_db/contigs.fasta.nhr | Bin 0 -> 64 bytes .../extract/G14/blast_db/contigs.fasta.nin | Bin 0 -> 152 bytes .../extract/G14/blast_db/contigs.fasta.njs | 22 + .../extract/G14/blast_db/contigs.fasta.not | Bin 0 -> 20 bytes .../extract/G14/blast_db/contigs.fasta.nsq | Bin 0 -> 6089 bytes .../extract/G14/blast_db/contigs.fasta.ntf | Bin 0 -> 16384 bytes .../extract/G14/blast_db/contigs.fasta.nto | Bin 0 -> 8 bytes .../outputs/extract/G14/filtered.hsps.txt | 24 + .../G14/processed.extracted.seqs.fasta | 42 + .../extract/G14/raw.extracted.seqs.fasta | 42 + .../outputs/extract/G14/seq_data.txt | 22 + .../outputs/extract/G2/blast/hsps.txt | 20 + .../outputs/extract/G2/blast_db/contigs.fasta | 2 + .../extract/G2/blast_db/contigs.fasta.ndb | Bin 0 -> 20480 bytes .../extract/G2/blast_db/contigs.fasta.nhr | Bin 0 -> 64 bytes .../extract/G2/blast_db/contigs.fasta.nin | Bin 0 -> 144 bytes .../extract/G2/blast_db/contigs.fasta.njs | 22 + .../extract/G2/blast_db/contigs.fasta.not | Bin 0 -> 20 bytes .../extract/G2/blast_db/contigs.fasta.nsq | Bin 0 -> 5808 bytes .../extract/G2/blast_db/contigs.fasta.ntf | Bin 0 -> 16384 bytes .../extract/G2/blast_db/contigs.fasta.nto | Bin 0 -> 8 bytes .../outputs/extract/G2/filtered.hsps.txt | 21 + .../extract/G2/processed.extracted.seqs.fasta | 40 + .../extract/G2/raw.extracted.seqs.fasta | 40 + .../test_data/outputs/extract/G2/seq_data.txt | 21 + .../outputs/extract/G3/blast/hsps.txt | 20 + .../outputs/extract/G3/blast_db/contigs.fasta | 2 + .../extract/G3/blast_db/contigs.fasta.ndb | Bin 0 -> 20480 bytes .../extract/G3/blast_db/contigs.fasta.nhr | Bin 0 -> 64 bytes .../extract/G3/blast_db/contigs.fasta.nin | Bin 0 -> 144 bytes .../extract/G3/blast_db/contigs.fasta.njs | 22 + .../extract/G3/blast_db/contigs.fasta.not | Bin 0 -> 20 bytes .../extract/G3/blast_db/contigs.fasta.nsq | Bin 0 -> 5808 bytes .../extract/G3/blast_db/contigs.fasta.ntf | Bin 0 -> 16384 bytes .../extract/G3/blast_db/contigs.fasta.nto | Bin 0 -> 8 bytes .../outputs/extract/G3/filtered.hsps.txt | 21 + .../extract/G3/processed.extracted.seqs.fasta | 40 + .../extract/G3/raw.extracted.seqs.fasta | 40 + .../test_data/outputs/extract/G3/seq_data.txt | 21 + .../outputs/extract/G4/blast/hsps.txt | 20 + .../outputs/extract/G4/blast_db/contigs.fasta | 2 + .../extract/G4/blast_db/contigs.fasta.ndb | Bin 0 -> 20480 bytes .../extract/G4/blast_db/contigs.fasta.nhr | Bin 0 -> 64 bytes .../extract/G4/blast_db/contigs.fasta.nin | Bin 0 -> 144 bytes .../extract/G4/blast_db/contigs.fasta.njs | 22 + .../extract/G4/blast_db/contigs.fasta.not | Bin 0 -> 20 bytes .../extract/G4/blast_db/contigs.fasta.nsq | Bin 0 -> 5808 bytes .../extract/G4/blast_db/contigs.fasta.ntf | Bin 0 -> 16384 bytes .../extract/G4/blast_db/contigs.fasta.nto | Bin 0 -> 8 bytes .../outputs/extract/G4/filtered.hsps.txt | 21 + .../extract/G4/processed.extracted.seqs.fasta | 40 + .../extract/G4/raw.extracted.seqs.fasta | 40 + .../test_data/outputs/extract/G4/seq_data.txt | 21 + .../outputs/extract/G5/blast/hsps.txt | 20 + .../outputs/extract/G5/blast_db/contigs.fasta | 2 + .../extract/G5/blast_db/contigs.fasta.ndb | Bin 0 -> 20480 bytes .../extract/G5/blast_db/contigs.fasta.nhr | Bin 0 -> 64 bytes .../extract/G5/blast_db/contigs.fasta.nin | Bin 0 -> 144 bytes .../extract/G5/blast_db/contigs.fasta.njs | 22 + .../extract/G5/blast_db/contigs.fasta.not | Bin 0 -> 20 bytes .../extract/G5/blast_db/contigs.fasta.nsq | Bin 0 -> 5808 bytes .../extract/G5/blast_db/contigs.fasta.ntf | Bin 0 -> 16384 bytes .../extract/G5/blast_db/contigs.fasta.nto | Bin 0 -> 8 bytes .../outputs/extract/G5/filtered.hsps.txt | 21 + .../extract/G5/processed.extracted.seqs.fasta | 40 + .../extract/G5/raw.extracted.seqs.fasta | 40 + .../test_data/outputs/extract/G5/seq_data.txt | 21 + .../outputs/extract/G6/blast/hsps.txt | 20 + .../outputs/extract/G6/blast_db/contigs.fasta | 2 + .../extract/G6/blast_db/contigs.fasta.ndb | Bin 0 -> 20480 bytes .../extract/G6/blast_db/contigs.fasta.nhr | Bin 0 -> 64 bytes .../extract/G6/blast_db/contigs.fasta.nin | Bin 0 -> 144 bytes .../extract/G6/blast_db/contigs.fasta.njs | 22 + .../extract/G6/blast_db/contigs.fasta.not | Bin 0 -> 20 bytes .../extract/G6/blast_db/contigs.fasta.nsq | Bin 0 -> 5808 bytes .../extract/G6/blast_db/contigs.fasta.ntf | Bin 0 -> 16384 bytes .../extract/G6/blast_db/contigs.fasta.nto | Bin 0 -> 8 bytes .../outputs/extract/G6/filtered.hsps.txt | 21 + .../extract/G6/processed.extracted.seqs.fasta | 40 + .../extract/G6/raw.extracted.seqs.fasta | 40 + .../test_data/outputs/extract/G6/seq_data.txt | 21 + .../outputs/extract/G7/blast/hsps.txt | 20 + .../outputs/extract/G7/blast_db/contigs.fasta | 2 + .../extract/G7/blast_db/contigs.fasta.ndb | Bin 0 -> 20480 bytes .../extract/G7/blast_db/contigs.fasta.nhr | Bin 0 -> 64 bytes .../extract/G7/blast_db/contigs.fasta.nin | Bin 0 -> 144 bytes .../extract/G7/blast_db/contigs.fasta.njs | 22 + .../extract/G7/blast_db/contigs.fasta.not | Bin 0 -> 20 bytes .../extract/G7/blast_db/contigs.fasta.nsq | Bin 0 -> 5808 bytes .../extract/G7/blast_db/contigs.fasta.ntf | Bin 0 -> 16384 bytes .../extract/G7/blast_db/contigs.fasta.nto | Bin 0 -> 8 bytes .../outputs/extract/G7/filtered.hsps.txt | 21 + .../extract/G7/processed.extracted.seqs.fasta | 40 + .../extract/G7/raw.extracted.seqs.fasta | 40 + .../test_data/outputs/extract/G7/seq_data.txt | 21 + .../outputs/extract/G8/blast/hsps.txt | 20 + .../outputs/extract/G8/blast_db/contigs.fasta | 2 + .../extract/G8/blast_db/contigs.fasta.ndb | Bin 0 -> 20480 bytes .../extract/G8/blast_db/contigs.fasta.nhr | Bin 0 -> 64 bytes .../extract/G8/blast_db/contigs.fasta.nin | Bin 0 -> 144 bytes .../extract/G8/blast_db/contigs.fasta.njs | 22 + .../extract/G8/blast_db/contigs.fasta.not | Bin 0 -> 20 bytes .../extract/G8/blast_db/contigs.fasta.nsq | Bin 0 -> 5808 bytes .../extract/G8/blast_db/contigs.fasta.ntf | Bin 0 -> 16384 bytes .../extract/G8/blast_db/contigs.fasta.nto | Bin 0 -> 8 bytes .../outputs/extract/G8/filtered.hsps.txt | 21 + .../extract/G8/processed.extracted.seqs.fasta | 40 + .../extract/G8/raw.extracted.seqs.fasta | 40 + .../test_data/outputs/extract/G8/seq_data.txt | 21 + .../outputs/extract/G9/blast/hsps.txt | 20 + .../outputs/extract/G9/blast_db/contigs.fasta | 2 + .../extract/G9/blast_db/contigs.fasta.ndb | Bin 0 -> 20480 bytes .../extract/G9/blast_db/contigs.fasta.nhr | Bin 0 -> 64 bytes .../extract/G9/blast_db/contigs.fasta.nin | Bin 0 -> 144 bytes .../extract/G9/blast_db/contigs.fasta.njs | 22 + .../extract/G9/blast_db/contigs.fasta.not | Bin 0 -> 20 bytes .../extract/G9/blast_db/contigs.fasta.nsq | Bin 0 -> 5736 bytes .../extract/G9/blast_db/contigs.fasta.ntf | Bin 0 -> 16384 bytes .../extract/G9/blast_db/contigs.fasta.nto | Bin 0 -> 8 bytes .../outputs/extract/G9/filtered.hsps.txt | 20 + .../extract/G9/processed.extracted.seqs.fasta | 38 + .../extract/G9/raw.extracted.seqs.fasta | 38 + .../test_data/outputs/extract/G9/seq_data.txt | 20 + .../blast/nucleotide/nucleotide.fasta | 40 + .../blast/nucleotide/nucleotide.ndb | Bin 0 -> 20480 bytes .../blast/nucleotide/nucleotide.nhr | Bin 0 -> 1290 bytes .../blast/nucleotide/nucleotide.nin | Bin 0 -> 372 bytes .../blast/nucleotide/nucleotide.njs | 22 + .../blast/nucleotide/nucleotide.not | Bin 0 -> 248 bytes .../blast/nucleotide/nucleotide.nsq | Bin 0 -> 5674 bytes .../blast/nucleotide/nucleotide.ntf | Bin 0 -> 16384 bytes .../blast/nucleotide/nucleotide.nto | Bin 0 -> 84 bytes .../locidex_db/blast/protein/protein.fasta | 40 + .../locidex_db/blast/protein/protein.pdb | Bin 0 -> 20480 bytes .../locidex_db/blast/protein/protein.phr | Bin 0 -> 1290 bytes .../locidex_db/blast/protein/protein.pin | Bin 0 -> 280 bytes .../locidex_db/blast/protein/protein.pjs | 22 + .../locidex_db/blast/protein/protein.pot | Bin 0 -> 248 bytes .../locidex_db/blast/protein/protein.psq | Bin 0 -> 7568 bytes .../locidex_db/blast/protein/protein.ptf | Bin 0 -> 16384 bytes .../locidex_db/blast/protein/protein.pto | Bin 0 -> 84 bytes .../test_data/outputs/locidex_db/config.json | 12 + tests/test_data/outputs/locidex_db/meta.json | 455 +++++ .../test_data/outputs/locidex_db/results.json | 15 + .../outputs/merge/conservative/profile.tsv | 15 + .../profile_dists/allele_map.json | 78 + .../profile_dists/query_profile.text | 15 + .../profile_dists/ref_profile.text | 15 + .../conservative/profile_dists/results.text | 15 + .../merge/conservative/profile_dists/run.json | 38 + .../outputs/merge/normal/profile.tsv | 15 + .../normal/profile_dists/allele_map.json | 78 + .../normal/profile_dists/query_profile.text | 15 + .../normal/profile_dists/ref_profile.text | 15 + .../merge/normal/profile_dists/results.text | 15 + .../merge/normal/profile_dists/run.json | 38 + .../conservative/G1/nucleotide.hits.txt | 21 + .../report/conservative/G1/profile.json | 24 + .../report/conservative/G1/protein.hits.txt | 21 + .../conservative/G10/nucleotide.hits.txt | 20 + .../report/conservative/G10/profile.json | 24 + .../report/conservative/G10/protein.hits.txt | 20 + .../conservative/G11/nucleotide.hits.txt | 22 + .../report/conservative/G11/profile.json | 24 + .../report/conservative/G11/protein.hits.txt | 22 + .../conservative/G12/nucleotide.hits.txt | 22 + .../report/conservative/G12/profile.json | 24 + .../report/conservative/G12/protein.hits.txt | 22 + .../conservative/G13/nucleotide.hits.txt | 22 + .../report/conservative/G13/profile.json | 24 + .../report/conservative/G13/protein.hits.txt | 22 + .../conservative/G14/nucleotide.hits.txt | 22 + .../report/conservative/G14/profile.json | 24 + .../report/conservative/G14/protein.hits.txt | 22 + .../conservative/G2/nucleotide.hits.txt | 21 + .../report/conservative/G2/profile.json | 24 + .../report/conservative/G2/protein.hits.txt | 21 + .../conservative/G3/nucleotide.hits.txt | 21 + .../report/conservative/G3/profile.json | 24 + .../report/conservative/G3/protein.hits.txt | 21 + .../conservative/G4/nucleotide.hits.txt | 21 + .../report/conservative/G4/profile.json | 24 + .../report/conservative/G4/protein.hits.txt | 21 + .../conservative/G5/nucleotide.hits.txt | 21 + .../report/conservative/G5/profile.json | 24 + .../report/conservative/G5/protein.hits.txt | 21 + .../conservative/G6/nucleotide.hits.txt | 21 + .../report/conservative/G6/profile.json | 24 + .../report/conservative/G6/protein.hits.txt | 21 + .../conservative/G7/nucleotide.hits.txt | 21 + .../report/conservative/G7/profile.json | 24 + .../report/conservative/G7/protein.hits.txt | 21 + .../conservative/G8/nucleotide.hits.txt | 21 + .../report/conservative/G8/profile.json | 24 + .../report/conservative/G8/protein.hits.txt | 21 + .../conservative/G9/nucleotide.hits.txt | 20 + .../report/conservative/G9/profile.json | 24 + .../report/conservative/G9/protein.hits.txt | 20 + .../report/normal/G1/nucleotide.hits.txt | 21 + .../outputs/report/normal/G1/profile.json | 24 + .../outputs/report/normal/G1/protein.hits.txt | 21 + .../report/normal/G10/nucleotide.hits.txt | 20 + .../outputs/report/normal/G10/profile.json | 24 + .../report/normal/G10/protein.hits.txt | 20 + .../report/normal/G11/nucleotide.hits.txt | 22 + .../outputs/report/normal/G11/profile.json | 24 + .../report/normal/G11/protein.hits.txt | 22 + .../report/normal/G12/nucleotide.hits.txt | 22 + .../outputs/report/normal/G12/profile.json | 24 + .../report/normal/G12/protein.hits.txt | 22 + .../report/normal/G13/nucleotide.hits.txt | 22 + .../outputs/report/normal/G13/profile.json | 24 + .../report/normal/G13/protein.hits.txt | 22 + .../report/normal/G14/nucleotide.hits.txt | 22 + .../outputs/report/normal/G14/profile.json | 24 + .../report/normal/G14/protein.hits.txt | 22 + .../report/normal/G2/nucleotide.hits.txt | 21 + .../outputs/report/normal/G2/profile.json | 24 + .../outputs/report/normal/G2/protein.hits.txt | 21 + .../report/normal/G3/nucleotide.hits.txt | 21 + .../outputs/report/normal/G3/profile.json | 24 + .../outputs/report/normal/G3/protein.hits.txt | 21 + .../report/normal/G4/nucleotide.hits.txt | 21 + .../outputs/report/normal/G4/profile.json | 24 + .../outputs/report/normal/G4/protein.hits.txt | 21 + .../report/normal/G5/nucleotide.hits.txt | 21 + .../outputs/report/normal/G5/profile.json | 24 + .../outputs/report/normal/G5/protein.hits.txt | 21 + .../report/normal/G6/nucleotide.hits.txt | 21 + .../outputs/report/normal/G6/profile.json | 24 + .../outputs/report/normal/G6/protein.hits.txt | 21 + .../report/normal/G7/nucleotide.hits.txt | 21 + .../outputs/report/normal/G7/profile.json | 24 + .../outputs/report/normal/G7/protein.hits.txt | 21 + .../report/normal/G8/nucleotide.hits.txt | 21 + .../outputs/report/normal/G8/profile.json | 24 + .../outputs/report/normal/G8/protein.hits.txt | 21 + .../report/normal/G9/nucleotide.hits.txt | 20 + .../outputs/report/normal/G9/profile.json | 24 + .../outputs/report/normal/G9/protein.hits.txt | 20 + .../search/G1/blast/nucleotide/hsps.txt | 20 + .../search/G1/blast/nucleotide/queries.fasta | 40 + .../outputs/search/G1/blast/protein/hsps.txt | 20 + .../search/G1/blast/protein/queries.fasta | 40 + tests/test_data/outputs/search/G1/run.json | 27 + .../outputs/search/G1/seq_store.json | 1744 ++++++++++++++++ .../search/G10/blast/nucleotide/hsps.txt | 19 + .../search/G10/blast/nucleotide/queries.fasta | 38 + .../outputs/search/G10/blast/protein/hsps.txt | 19 + .../search/G10/blast/protein/queries.fasta | 38 + tests/test_data/outputs/search/G10/run.json | 27 + .../outputs/search/G10/seq_store.json | 1685 +++++++++++++++ .../search/G11/blast/nucleotide/hsps.txt | 21 + .../search/G11/blast/nucleotide/queries.fasta | 42 + .../outputs/search/G11/blast/protein/hsps.txt | 21 + .../search/G11/blast/protein/queries.fasta | 42 + tests/test_data/outputs/search/G11/run.json | 27 + .../outputs/search/G11/seq_store.json | 1801 +++++++++++++++++ .../search/G12/blast/nucleotide/hsps.txt | 21 + .../search/G12/blast/nucleotide/queries.fasta | 42 + .../outputs/search/G12/blast/protein/hsps.txt | 21 + .../search/G12/blast/protein/queries.fasta | 42 + tests/test_data/outputs/search/G12/run.json | 27 + .../outputs/search/G12/seq_store.json | 1801 +++++++++++++++++ .../search/G13/blast/nucleotide/hsps.txt | 21 + .../search/G13/blast/nucleotide/queries.fasta | 42 + .../outputs/search/G13/blast/protein/hsps.txt | 21 + .../search/G13/blast/protein/queries.fasta | 42 + tests/test_data/outputs/search/G13/run.json | 27 + .../outputs/search/G13/seq_store.json | 1801 +++++++++++++++++ .../search/G14/blast/nucleotide/hsps.txt | 21 + .../search/G14/blast/nucleotide/queries.fasta | 42 + .../outputs/search/G14/blast/protein/hsps.txt | 21 + .../search/G14/blast/protein/queries.fasta | 42 + tests/test_data/outputs/search/G14/run.json | 27 + .../outputs/search/G14/seq_store.json | 1801 +++++++++++++++++ .../search/G2/blast/nucleotide/hsps.txt | 20 + .../search/G2/blast/nucleotide/queries.fasta | 40 + .../outputs/search/G2/blast/protein/hsps.txt | 20 + .../search/G2/blast/protein/queries.fasta | 40 + tests/test_data/outputs/search/G2/run.json | 27 + .../outputs/search/G2/seq_store.json | 1744 ++++++++++++++++ .../search/G3/blast/nucleotide/hsps.txt | 20 + .../search/G3/blast/nucleotide/queries.fasta | 40 + .../outputs/search/G3/blast/protein/hsps.txt | 20 + .../search/G3/blast/protein/queries.fasta | 40 + tests/test_data/outputs/search/G3/run.json | 27 + .../outputs/search/G3/seq_store.json | 1744 ++++++++++++++++ .../search/G4/blast/nucleotide/hsps.txt | 20 + .../search/G4/blast/nucleotide/queries.fasta | 40 + .../outputs/search/G4/blast/protein/hsps.txt | 20 + .../search/G4/blast/protein/queries.fasta | 40 + tests/test_data/outputs/search/G4/run.json | 27 + .../outputs/search/G4/seq_store.json | 1744 ++++++++++++++++ .../search/G5/blast/nucleotide/hsps.txt | 20 + .../search/G5/blast/nucleotide/queries.fasta | 40 + .../outputs/search/G5/blast/protein/hsps.txt | 20 + .../search/G5/blast/protein/queries.fasta | 40 + tests/test_data/outputs/search/G5/run.json | 27 + .../outputs/search/G5/seq_store.json | 1744 ++++++++++++++++ .../search/G6/blast/nucleotide/hsps.txt | 20 + .../search/G6/blast/nucleotide/queries.fasta | 40 + .../outputs/search/G6/blast/protein/hsps.txt | 20 + .../search/G6/blast/protein/queries.fasta | 40 + tests/test_data/outputs/search/G6/run.json | 27 + .../outputs/search/G6/seq_store.json | 1744 ++++++++++++++++ .../search/G7/blast/nucleotide/hsps.txt | 20 + .../search/G7/blast/nucleotide/queries.fasta | 40 + .../outputs/search/G7/blast/protein/hsps.txt | 20 + .../search/G7/blast/protein/queries.fasta | 40 + tests/test_data/outputs/search/G7/run.json | 27 + .../outputs/search/G7/seq_store.json | 1744 ++++++++++++++++ .../search/G8/blast/nucleotide/hsps.txt | 20 + .../search/G8/blast/nucleotide/queries.fasta | 40 + .../outputs/search/G8/blast/protein/hsps.txt | 20 + .../search/G8/blast/protein/queries.fasta | 40 + tests/test_data/outputs/search/G8/run.json | 27 + .../outputs/search/G8/seq_store.json | 1744 ++++++++++++++++ .../search/G9/blast/nucleotide/hsps.txt | 19 + .../search/G9/blast/nucleotide/queries.fasta | 38 + .../outputs/search/G9/blast/protein/hsps.txt | 19 + .../search/G9/blast/protein/queries.fasta | 38 + tests/test_data/outputs/search/G9/run.json | 27 + .../outputs/search/G9/seq_store.json | 1685 +++++++++++++++ tests/test_workflows.yml | 211 +- 434 files changed, 32602 insertions(+), 7 deletions(-) create mode 100755 tests/test_data/genomes/G1.fasta create mode 100755 tests/test_data/genomes/G10.fasta create mode 100755 tests/test_data/genomes/G11.fasta create mode 100755 tests/test_data/genomes/G12.fasta create mode 100755 tests/test_data/genomes/G13.fasta create mode 100755 tests/test_data/genomes/G14.fasta create mode 100755 tests/test_data/genomes/G2.fasta create mode 100755 tests/test_data/genomes/G3.fasta create mode 100755 tests/test_data/genomes/G4.fasta create mode 100755 tests/test_data/genomes/G5.fasta create mode 100755 tests/test_data/genomes/G6.fasta create mode 100755 tests/test_data/genomes/G7.fasta create mode 100755 tests/test_data/genomes/G8.fasta create mode 100755 tests/test_data/genomes/G9.fasta create mode 100755 tests/test_data/outputs/db/blast/nucleotide/nucleotide.fasta create mode 100755 tests/test_data/outputs/db/blast/nucleotide/nucleotide.ndb create mode 100755 tests/test_data/outputs/db/blast/nucleotide/nucleotide.nhr create mode 100755 tests/test_data/outputs/db/blast/nucleotide/nucleotide.nin create mode 100755 tests/test_data/outputs/db/blast/nucleotide/nucleotide.njs create mode 100755 tests/test_data/outputs/db/blast/nucleotide/nucleotide.not create mode 100755 tests/test_data/outputs/db/blast/nucleotide/nucleotide.nsq create mode 100755 tests/test_data/outputs/db/blast/nucleotide/nucleotide.ntf create mode 100755 tests/test_data/outputs/db/blast/nucleotide/nucleotide.nto create mode 100755 tests/test_data/outputs/db/blast/protein/protein.fasta create mode 100755 tests/test_data/outputs/db/blast/protein/protein.pdb create mode 100755 tests/test_data/outputs/db/blast/protein/protein.phr create mode 100755 tests/test_data/outputs/db/blast/protein/protein.pin create mode 100755 tests/test_data/outputs/db/blast/protein/protein.pjs create mode 100755 tests/test_data/outputs/db/blast/protein/protein.pot create mode 100755 tests/test_data/outputs/db/blast/protein/protein.psq create mode 100755 tests/test_data/outputs/db/blast/protein/protein.ptf create mode 100755 tests/test_data/outputs/db/blast/protein/protein.pto create mode 100755 tests/test_data/outputs/db/config.json create mode 100755 tests/test_data/outputs/db/meta.json create mode 100755 tests/test_data/outputs/db/results.json create mode 100755 tests/test_data/outputs/extract/G1/blast/hsps.txt create mode 100755 tests/test_data/outputs/extract/G1/blast_db/contigs.fasta create mode 100755 tests/test_data/outputs/extract/G1/blast_db/contigs.fasta.ndb create mode 100755 tests/test_data/outputs/extract/G1/blast_db/contigs.fasta.nhr create mode 100755 tests/test_data/outputs/extract/G1/blast_db/contigs.fasta.nin create mode 100755 tests/test_data/outputs/extract/G1/blast_db/contigs.fasta.njs create mode 100755 tests/test_data/outputs/extract/G1/blast_db/contigs.fasta.not create mode 100755 tests/test_data/outputs/extract/G1/blast_db/contigs.fasta.nsq create mode 100755 tests/test_data/outputs/extract/G1/blast_db/contigs.fasta.ntf create mode 100755 tests/test_data/outputs/extract/G1/blast_db/contigs.fasta.nto create mode 100755 tests/test_data/outputs/extract/G1/filtered.hsps.txt create mode 100755 tests/test_data/outputs/extract/G1/processed.extracted.seqs.fasta create mode 100755 tests/test_data/outputs/extract/G1/raw.extracted.seqs.fasta create mode 100755 tests/test_data/outputs/extract/G1/seq_data.txt create mode 100755 tests/test_data/outputs/extract/G10/blast/hsps.txt create mode 100755 tests/test_data/outputs/extract/G10/blast_db/contigs.fasta create mode 100755 tests/test_data/outputs/extract/G10/blast_db/contigs.fasta.ndb create mode 100755 tests/test_data/outputs/extract/G10/blast_db/contigs.fasta.nhr create mode 100755 tests/test_data/outputs/extract/G10/blast_db/contigs.fasta.nin create mode 100755 tests/test_data/outputs/extract/G10/blast_db/contigs.fasta.njs create mode 100755 tests/test_data/outputs/extract/G10/blast_db/contigs.fasta.not create mode 100755 tests/test_data/outputs/extract/G10/blast_db/contigs.fasta.nsq create mode 100755 tests/test_data/outputs/extract/G10/blast_db/contigs.fasta.ntf create mode 100755 tests/test_data/outputs/extract/G10/blast_db/contigs.fasta.nto create mode 100755 tests/test_data/outputs/extract/G10/filtered.hsps.txt create mode 100755 tests/test_data/outputs/extract/G10/processed.extracted.seqs.fasta create mode 100755 tests/test_data/outputs/extract/G10/raw.extracted.seqs.fasta create mode 100755 tests/test_data/outputs/extract/G10/seq_data.txt create mode 100755 tests/test_data/outputs/extract/G11/blast/hsps.txt create mode 100755 tests/test_data/outputs/extract/G11/blast_db/contigs.fasta create mode 100755 tests/test_data/outputs/extract/G11/blast_db/contigs.fasta.ndb create mode 100755 tests/test_data/outputs/extract/G11/blast_db/contigs.fasta.nhr create mode 100755 tests/test_data/outputs/extract/G11/blast_db/contigs.fasta.nin create mode 100755 tests/test_data/outputs/extract/G11/blast_db/contigs.fasta.njs create mode 100755 tests/test_data/outputs/extract/G11/blast_db/contigs.fasta.not create mode 100755 tests/test_data/outputs/extract/G11/blast_db/contigs.fasta.nsq create mode 100755 tests/test_data/outputs/extract/G11/blast_db/contigs.fasta.ntf create mode 100755 tests/test_data/outputs/extract/G11/blast_db/contigs.fasta.nto create mode 100755 tests/test_data/outputs/extract/G11/filtered.hsps.txt create mode 100755 tests/test_data/outputs/extract/G11/processed.extracted.seqs.fasta create mode 100755 tests/test_data/outputs/extract/G11/raw.extracted.seqs.fasta create mode 100755 tests/test_data/outputs/extract/G11/seq_data.txt create mode 100755 tests/test_data/outputs/extract/G12/blast/hsps.txt create mode 100755 tests/test_data/outputs/extract/G12/blast_db/contigs.fasta create mode 100755 tests/test_data/outputs/extract/G12/blast_db/contigs.fasta.ndb create mode 100755 tests/test_data/outputs/extract/G12/blast_db/contigs.fasta.nhr create mode 100755 tests/test_data/outputs/extract/G12/blast_db/contigs.fasta.nin create mode 100755 tests/test_data/outputs/extract/G12/blast_db/contigs.fasta.njs create mode 100755 tests/test_data/outputs/extract/G12/blast_db/contigs.fasta.not create mode 100755 tests/test_data/outputs/extract/G12/blast_db/contigs.fasta.nsq create mode 100755 tests/test_data/outputs/extract/G12/blast_db/contigs.fasta.ntf create mode 100755 tests/test_data/outputs/extract/G12/blast_db/contigs.fasta.nto create mode 100755 tests/test_data/outputs/extract/G12/filtered.hsps.txt create mode 100755 tests/test_data/outputs/extract/G12/processed.extracted.seqs.fasta create mode 100755 tests/test_data/outputs/extract/G12/raw.extracted.seqs.fasta create mode 100755 tests/test_data/outputs/extract/G12/seq_data.txt create mode 100755 tests/test_data/outputs/extract/G13/blast/hsps.txt create mode 100755 tests/test_data/outputs/extract/G13/blast_db/contigs.fasta create mode 100755 tests/test_data/outputs/extract/G13/blast_db/contigs.fasta.ndb create mode 100755 tests/test_data/outputs/extract/G13/blast_db/contigs.fasta.nhr create mode 100755 tests/test_data/outputs/extract/G13/blast_db/contigs.fasta.nin create mode 100755 tests/test_data/outputs/extract/G13/blast_db/contigs.fasta.njs create mode 100755 tests/test_data/outputs/extract/G13/blast_db/contigs.fasta.not create mode 100755 tests/test_data/outputs/extract/G13/blast_db/contigs.fasta.nsq create mode 100755 tests/test_data/outputs/extract/G13/blast_db/contigs.fasta.ntf create mode 100755 tests/test_data/outputs/extract/G13/blast_db/contigs.fasta.nto create mode 100755 tests/test_data/outputs/extract/G13/filtered.hsps.txt create mode 100755 tests/test_data/outputs/extract/G13/processed.extracted.seqs.fasta create mode 100755 tests/test_data/outputs/extract/G13/raw.extracted.seqs.fasta create mode 100755 tests/test_data/outputs/extract/G13/seq_data.txt create mode 100755 tests/test_data/outputs/extract/G14/blast/hsps.txt create mode 100755 tests/test_data/outputs/extract/G14/blast_db/contigs.fasta create mode 100755 tests/test_data/outputs/extract/G14/blast_db/contigs.fasta.ndb create mode 100755 tests/test_data/outputs/extract/G14/blast_db/contigs.fasta.nhr create mode 100755 tests/test_data/outputs/extract/G14/blast_db/contigs.fasta.nin create mode 100755 tests/test_data/outputs/extract/G14/blast_db/contigs.fasta.njs create mode 100755 tests/test_data/outputs/extract/G14/blast_db/contigs.fasta.not create mode 100755 tests/test_data/outputs/extract/G14/blast_db/contigs.fasta.nsq create mode 100755 tests/test_data/outputs/extract/G14/blast_db/contigs.fasta.ntf create mode 100755 tests/test_data/outputs/extract/G14/blast_db/contigs.fasta.nto create mode 100755 tests/test_data/outputs/extract/G14/filtered.hsps.txt create mode 100755 tests/test_data/outputs/extract/G14/processed.extracted.seqs.fasta create mode 100755 tests/test_data/outputs/extract/G14/raw.extracted.seqs.fasta create mode 100755 tests/test_data/outputs/extract/G14/seq_data.txt create mode 100755 tests/test_data/outputs/extract/G2/blast/hsps.txt create mode 100755 tests/test_data/outputs/extract/G2/blast_db/contigs.fasta create mode 100755 tests/test_data/outputs/extract/G2/blast_db/contigs.fasta.ndb create mode 100755 tests/test_data/outputs/extract/G2/blast_db/contigs.fasta.nhr create mode 100755 tests/test_data/outputs/extract/G2/blast_db/contigs.fasta.nin create mode 100755 tests/test_data/outputs/extract/G2/blast_db/contigs.fasta.njs create mode 100755 tests/test_data/outputs/extract/G2/blast_db/contigs.fasta.not create mode 100755 tests/test_data/outputs/extract/G2/blast_db/contigs.fasta.nsq create mode 100755 tests/test_data/outputs/extract/G2/blast_db/contigs.fasta.ntf create mode 100755 tests/test_data/outputs/extract/G2/blast_db/contigs.fasta.nto create mode 100755 tests/test_data/outputs/extract/G2/filtered.hsps.txt create mode 100755 tests/test_data/outputs/extract/G2/processed.extracted.seqs.fasta create mode 100755 tests/test_data/outputs/extract/G2/raw.extracted.seqs.fasta create mode 100755 tests/test_data/outputs/extract/G2/seq_data.txt create mode 100755 tests/test_data/outputs/extract/G3/blast/hsps.txt create mode 100755 tests/test_data/outputs/extract/G3/blast_db/contigs.fasta create mode 100755 tests/test_data/outputs/extract/G3/blast_db/contigs.fasta.ndb create mode 100755 tests/test_data/outputs/extract/G3/blast_db/contigs.fasta.nhr create mode 100755 tests/test_data/outputs/extract/G3/blast_db/contigs.fasta.nin create mode 100755 tests/test_data/outputs/extract/G3/blast_db/contigs.fasta.njs create mode 100755 tests/test_data/outputs/extract/G3/blast_db/contigs.fasta.not create mode 100755 tests/test_data/outputs/extract/G3/blast_db/contigs.fasta.nsq create mode 100755 tests/test_data/outputs/extract/G3/blast_db/contigs.fasta.ntf create mode 100755 tests/test_data/outputs/extract/G3/blast_db/contigs.fasta.nto create mode 100755 tests/test_data/outputs/extract/G3/filtered.hsps.txt create mode 100755 tests/test_data/outputs/extract/G3/processed.extracted.seqs.fasta create mode 100755 tests/test_data/outputs/extract/G3/raw.extracted.seqs.fasta create mode 100755 tests/test_data/outputs/extract/G3/seq_data.txt create mode 100755 tests/test_data/outputs/extract/G4/blast/hsps.txt create mode 100755 tests/test_data/outputs/extract/G4/blast_db/contigs.fasta create mode 100755 tests/test_data/outputs/extract/G4/blast_db/contigs.fasta.ndb create mode 100755 tests/test_data/outputs/extract/G4/blast_db/contigs.fasta.nhr create mode 100755 tests/test_data/outputs/extract/G4/blast_db/contigs.fasta.nin create mode 100755 tests/test_data/outputs/extract/G4/blast_db/contigs.fasta.njs create mode 100755 tests/test_data/outputs/extract/G4/blast_db/contigs.fasta.not create mode 100755 tests/test_data/outputs/extract/G4/blast_db/contigs.fasta.nsq create mode 100755 tests/test_data/outputs/extract/G4/blast_db/contigs.fasta.ntf create mode 100755 tests/test_data/outputs/extract/G4/blast_db/contigs.fasta.nto create mode 100755 tests/test_data/outputs/extract/G4/filtered.hsps.txt create mode 100755 tests/test_data/outputs/extract/G4/processed.extracted.seqs.fasta create mode 100755 tests/test_data/outputs/extract/G4/raw.extracted.seqs.fasta create mode 100755 tests/test_data/outputs/extract/G4/seq_data.txt create mode 100755 tests/test_data/outputs/extract/G5/blast/hsps.txt create mode 100755 tests/test_data/outputs/extract/G5/blast_db/contigs.fasta create mode 100755 tests/test_data/outputs/extract/G5/blast_db/contigs.fasta.ndb create mode 100755 tests/test_data/outputs/extract/G5/blast_db/contigs.fasta.nhr create mode 100755 tests/test_data/outputs/extract/G5/blast_db/contigs.fasta.nin create mode 100755 tests/test_data/outputs/extract/G5/blast_db/contigs.fasta.njs create mode 100755 tests/test_data/outputs/extract/G5/blast_db/contigs.fasta.not create mode 100755 tests/test_data/outputs/extract/G5/blast_db/contigs.fasta.nsq create mode 100755 tests/test_data/outputs/extract/G5/blast_db/contigs.fasta.ntf create mode 100755 tests/test_data/outputs/extract/G5/blast_db/contigs.fasta.nto create mode 100755 tests/test_data/outputs/extract/G5/filtered.hsps.txt create mode 100755 tests/test_data/outputs/extract/G5/processed.extracted.seqs.fasta create mode 100755 tests/test_data/outputs/extract/G5/raw.extracted.seqs.fasta create mode 100755 tests/test_data/outputs/extract/G5/seq_data.txt create mode 100755 tests/test_data/outputs/extract/G6/blast/hsps.txt create mode 100755 tests/test_data/outputs/extract/G6/blast_db/contigs.fasta create mode 100755 tests/test_data/outputs/extract/G6/blast_db/contigs.fasta.ndb create mode 100755 tests/test_data/outputs/extract/G6/blast_db/contigs.fasta.nhr create mode 100755 tests/test_data/outputs/extract/G6/blast_db/contigs.fasta.nin create mode 100755 tests/test_data/outputs/extract/G6/blast_db/contigs.fasta.njs create mode 100755 tests/test_data/outputs/extract/G6/blast_db/contigs.fasta.not create mode 100755 tests/test_data/outputs/extract/G6/blast_db/contigs.fasta.nsq create mode 100755 tests/test_data/outputs/extract/G6/blast_db/contigs.fasta.ntf create mode 100755 tests/test_data/outputs/extract/G6/blast_db/contigs.fasta.nto create mode 100755 tests/test_data/outputs/extract/G6/filtered.hsps.txt create mode 100755 tests/test_data/outputs/extract/G6/processed.extracted.seqs.fasta create mode 100755 tests/test_data/outputs/extract/G6/raw.extracted.seqs.fasta create mode 100755 tests/test_data/outputs/extract/G6/seq_data.txt create mode 100755 tests/test_data/outputs/extract/G7/blast/hsps.txt create mode 100755 tests/test_data/outputs/extract/G7/blast_db/contigs.fasta create mode 100755 tests/test_data/outputs/extract/G7/blast_db/contigs.fasta.ndb create mode 100755 tests/test_data/outputs/extract/G7/blast_db/contigs.fasta.nhr create mode 100755 tests/test_data/outputs/extract/G7/blast_db/contigs.fasta.nin create mode 100755 tests/test_data/outputs/extract/G7/blast_db/contigs.fasta.njs create mode 100755 tests/test_data/outputs/extract/G7/blast_db/contigs.fasta.not create mode 100755 tests/test_data/outputs/extract/G7/blast_db/contigs.fasta.nsq create mode 100755 tests/test_data/outputs/extract/G7/blast_db/contigs.fasta.ntf create mode 100755 tests/test_data/outputs/extract/G7/blast_db/contigs.fasta.nto create mode 100755 tests/test_data/outputs/extract/G7/filtered.hsps.txt create mode 100755 tests/test_data/outputs/extract/G7/processed.extracted.seqs.fasta create mode 100755 tests/test_data/outputs/extract/G7/raw.extracted.seqs.fasta create mode 100755 tests/test_data/outputs/extract/G7/seq_data.txt create mode 100755 tests/test_data/outputs/extract/G8/blast/hsps.txt create mode 100755 tests/test_data/outputs/extract/G8/blast_db/contigs.fasta create mode 100755 tests/test_data/outputs/extract/G8/blast_db/contigs.fasta.ndb create mode 100755 tests/test_data/outputs/extract/G8/blast_db/contigs.fasta.nhr create mode 100755 tests/test_data/outputs/extract/G8/blast_db/contigs.fasta.nin create mode 100755 tests/test_data/outputs/extract/G8/blast_db/contigs.fasta.njs create mode 100755 tests/test_data/outputs/extract/G8/blast_db/contigs.fasta.not create mode 100755 tests/test_data/outputs/extract/G8/blast_db/contigs.fasta.nsq create mode 100755 tests/test_data/outputs/extract/G8/blast_db/contigs.fasta.ntf create mode 100755 tests/test_data/outputs/extract/G8/blast_db/contigs.fasta.nto create mode 100755 tests/test_data/outputs/extract/G8/filtered.hsps.txt create mode 100755 tests/test_data/outputs/extract/G8/processed.extracted.seqs.fasta create mode 100755 tests/test_data/outputs/extract/G8/raw.extracted.seqs.fasta create mode 100755 tests/test_data/outputs/extract/G8/seq_data.txt create mode 100755 tests/test_data/outputs/extract/G9/blast/hsps.txt create mode 100755 tests/test_data/outputs/extract/G9/blast_db/contigs.fasta create mode 100755 tests/test_data/outputs/extract/G9/blast_db/contigs.fasta.ndb create mode 100755 tests/test_data/outputs/extract/G9/blast_db/contigs.fasta.nhr create mode 100755 tests/test_data/outputs/extract/G9/blast_db/contigs.fasta.nin create mode 100755 tests/test_data/outputs/extract/G9/blast_db/contigs.fasta.njs create mode 100755 tests/test_data/outputs/extract/G9/blast_db/contigs.fasta.not create mode 100755 tests/test_data/outputs/extract/G9/blast_db/contigs.fasta.nsq create mode 100755 tests/test_data/outputs/extract/G9/blast_db/contigs.fasta.ntf create mode 100755 tests/test_data/outputs/extract/G9/blast_db/contigs.fasta.nto create mode 100755 tests/test_data/outputs/extract/G9/filtered.hsps.txt create mode 100755 tests/test_data/outputs/extract/G9/processed.extracted.seqs.fasta create mode 100755 tests/test_data/outputs/extract/G9/raw.extracted.seqs.fasta create mode 100755 tests/test_data/outputs/extract/G9/seq_data.txt create mode 100755 tests/test_data/outputs/locidex_db/blast/nucleotide/nucleotide.fasta create mode 100755 tests/test_data/outputs/locidex_db/blast/nucleotide/nucleotide.ndb create mode 100755 tests/test_data/outputs/locidex_db/blast/nucleotide/nucleotide.nhr create mode 100755 tests/test_data/outputs/locidex_db/blast/nucleotide/nucleotide.nin create mode 100755 tests/test_data/outputs/locidex_db/blast/nucleotide/nucleotide.njs create mode 100755 tests/test_data/outputs/locidex_db/blast/nucleotide/nucleotide.not create mode 100755 tests/test_data/outputs/locidex_db/blast/nucleotide/nucleotide.nsq create mode 100755 tests/test_data/outputs/locidex_db/blast/nucleotide/nucleotide.ntf create mode 100755 tests/test_data/outputs/locidex_db/blast/nucleotide/nucleotide.nto create mode 100755 tests/test_data/outputs/locidex_db/blast/protein/protein.fasta create mode 100755 tests/test_data/outputs/locidex_db/blast/protein/protein.pdb create mode 100755 tests/test_data/outputs/locidex_db/blast/protein/protein.phr create mode 100755 tests/test_data/outputs/locidex_db/blast/protein/protein.pin create mode 100755 tests/test_data/outputs/locidex_db/blast/protein/protein.pjs create mode 100755 tests/test_data/outputs/locidex_db/blast/protein/protein.pot create mode 100755 tests/test_data/outputs/locidex_db/blast/protein/protein.psq create mode 100755 tests/test_data/outputs/locidex_db/blast/protein/protein.ptf create mode 100755 tests/test_data/outputs/locidex_db/blast/protein/protein.pto create mode 100755 tests/test_data/outputs/locidex_db/config.json create mode 100755 tests/test_data/outputs/locidex_db/meta.json create mode 100755 tests/test_data/outputs/locidex_db/results.json create mode 100755 tests/test_data/outputs/merge/conservative/profile.tsv create mode 100755 tests/test_data/outputs/merge/conservative/profile_dists/allele_map.json create mode 100755 tests/test_data/outputs/merge/conservative/profile_dists/query_profile.text create mode 100755 tests/test_data/outputs/merge/conservative/profile_dists/ref_profile.text create mode 100755 tests/test_data/outputs/merge/conservative/profile_dists/results.text create mode 100755 tests/test_data/outputs/merge/conservative/profile_dists/run.json create mode 100755 tests/test_data/outputs/merge/normal/profile.tsv create mode 100755 tests/test_data/outputs/merge/normal/profile_dists/allele_map.json create mode 100755 tests/test_data/outputs/merge/normal/profile_dists/query_profile.text create mode 100755 tests/test_data/outputs/merge/normal/profile_dists/ref_profile.text create mode 100755 tests/test_data/outputs/merge/normal/profile_dists/results.text create mode 100755 tests/test_data/outputs/merge/normal/profile_dists/run.json create mode 100755 tests/test_data/outputs/report/conservative/G1/nucleotide.hits.txt create mode 100755 tests/test_data/outputs/report/conservative/G1/profile.json create mode 100755 tests/test_data/outputs/report/conservative/G1/protein.hits.txt create mode 100755 tests/test_data/outputs/report/conservative/G10/nucleotide.hits.txt create mode 100755 tests/test_data/outputs/report/conservative/G10/profile.json create mode 100755 tests/test_data/outputs/report/conservative/G10/protein.hits.txt create mode 100755 tests/test_data/outputs/report/conservative/G11/nucleotide.hits.txt create mode 100755 tests/test_data/outputs/report/conservative/G11/profile.json create mode 100755 tests/test_data/outputs/report/conservative/G11/protein.hits.txt create mode 100755 tests/test_data/outputs/report/conservative/G12/nucleotide.hits.txt create mode 100755 tests/test_data/outputs/report/conservative/G12/profile.json create mode 100755 tests/test_data/outputs/report/conservative/G12/protein.hits.txt create mode 100755 tests/test_data/outputs/report/conservative/G13/nucleotide.hits.txt create mode 100755 tests/test_data/outputs/report/conservative/G13/profile.json create mode 100755 tests/test_data/outputs/report/conservative/G13/protein.hits.txt create mode 100755 tests/test_data/outputs/report/conservative/G14/nucleotide.hits.txt create mode 100755 tests/test_data/outputs/report/conservative/G14/profile.json create mode 100755 tests/test_data/outputs/report/conservative/G14/protein.hits.txt create mode 100755 tests/test_data/outputs/report/conservative/G2/nucleotide.hits.txt create mode 100755 tests/test_data/outputs/report/conservative/G2/profile.json create mode 100755 tests/test_data/outputs/report/conservative/G2/protein.hits.txt create mode 100755 tests/test_data/outputs/report/conservative/G3/nucleotide.hits.txt create mode 100755 tests/test_data/outputs/report/conservative/G3/profile.json create mode 100755 tests/test_data/outputs/report/conservative/G3/protein.hits.txt create mode 100755 tests/test_data/outputs/report/conservative/G4/nucleotide.hits.txt create mode 100755 tests/test_data/outputs/report/conservative/G4/profile.json create mode 100755 tests/test_data/outputs/report/conservative/G4/protein.hits.txt create mode 100755 tests/test_data/outputs/report/conservative/G5/nucleotide.hits.txt create mode 100755 tests/test_data/outputs/report/conservative/G5/profile.json create mode 100755 tests/test_data/outputs/report/conservative/G5/protein.hits.txt create mode 100755 tests/test_data/outputs/report/conservative/G6/nucleotide.hits.txt create mode 100755 tests/test_data/outputs/report/conservative/G6/profile.json create mode 100755 tests/test_data/outputs/report/conservative/G6/protein.hits.txt create mode 100755 tests/test_data/outputs/report/conservative/G7/nucleotide.hits.txt create mode 100755 tests/test_data/outputs/report/conservative/G7/profile.json create mode 100755 tests/test_data/outputs/report/conservative/G7/protein.hits.txt create mode 100755 tests/test_data/outputs/report/conservative/G8/nucleotide.hits.txt create mode 100755 tests/test_data/outputs/report/conservative/G8/profile.json create mode 100755 tests/test_data/outputs/report/conservative/G8/protein.hits.txt create mode 100755 tests/test_data/outputs/report/conservative/G9/nucleotide.hits.txt create mode 100755 tests/test_data/outputs/report/conservative/G9/profile.json create mode 100755 tests/test_data/outputs/report/conservative/G9/protein.hits.txt create mode 100755 tests/test_data/outputs/report/normal/G1/nucleotide.hits.txt create mode 100755 tests/test_data/outputs/report/normal/G1/profile.json create mode 100755 tests/test_data/outputs/report/normal/G1/protein.hits.txt create mode 100755 tests/test_data/outputs/report/normal/G10/nucleotide.hits.txt create mode 100755 tests/test_data/outputs/report/normal/G10/profile.json create mode 100755 tests/test_data/outputs/report/normal/G10/protein.hits.txt create mode 100755 tests/test_data/outputs/report/normal/G11/nucleotide.hits.txt create mode 100755 tests/test_data/outputs/report/normal/G11/profile.json create mode 100755 tests/test_data/outputs/report/normal/G11/protein.hits.txt create mode 100755 tests/test_data/outputs/report/normal/G12/nucleotide.hits.txt create mode 100755 tests/test_data/outputs/report/normal/G12/profile.json create mode 100755 tests/test_data/outputs/report/normal/G12/protein.hits.txt create mode 100755 tests/test_data/outputs/report/normal/G13/nucleotide.hits.txt create mode 100755 tests/test_data/outputs/report/normal/G13/profile.json create mode 100755 tests/test_data/outputs/report/normal/G13/protein.hits.txt create mode 100755 tests/test_data/outputs/report/normal/G14/nucleotide.hits.txt create mode 100755 tests/test_data/outputs/report/normal/G14/profile.json create mode 100755 tests/test_data/outputs/report/normal/G14/protein.hits.txt create mode 100755 tests/test_data/outputs/report/normal/G2/nucleotide.hits.txt create mode 100755 tests/test_data/outputs/report/normal/G2/profile.json create mode 100755 tests/test_data/outputs/report/normal/G2/protein.hits.txt create mode 100755 tests/test_data/outputs/report/normal/G3/nucleotide.hits.txt create mode 100755 tests/test_data/outputs/report/normal/G3/profile.json create mode 100755 tests/test_data/outputs/report/normal/G3/protein.hits.txt create mode 100755 tests/test_data/outputs/report/normal/G4/nucleotide.hits.txt create mode 100755 tests/test_data/outputs/report/normal/G4/profile.json create mode 100755 tests/test_data/outputs/report/normal/G4/protein.hits.txt create mode 100755 tests/test_data/outputs/report/normal/G5/nucleotide.hits.txt create mode 100755 tests/test_data/outputs/report/normal/G5/profile.json create mode 100755 tests/test_data/outputs/report/normal/G5/protein.hits.txt create mode 100755 tests/test_data/outputs/report/normal/G6/nucleotide.hits.txt create mode 100755 tests/test_data/outputs/report/normal/G6/profile.json create mode 100755 tests/test_data/outputs/report/normal/G6/protein.hits.txt create mode 100755 tests/test_data/outputs/report/normal/G7/nucleotide.hits.txt create mode 100755 tests/test_data/outputs/report/normal/G7/profile.json create mode 100755 tests/test_data/outputs/report/normal/G7/protein.hits.txt create mode 100755 tests/test_data/outputs/report/normal/G8/nucleotide.hits.txt create mode 100755 tests/test_data/outputs/report/normal/G8/profile.json create mode 100755 tests/test_data/outputs/report/normal/G8/protein.hits.txt create mode 100755 tests/test_data/outputs/report/normal/G9/nucleotide.hits.txt create mode 100755 tests/test_data/outputs/report/normal/G9/profile.json create mode 100755 tests/test_data/outputs/report/normal/G9/protein.hits.txt create mode 100755 tests/test_data/outputs/search/G1/blast/nucleotide/hsps.txt create mode 100755 tests/test_data/outputs/search/G1/blast/nucleotide/queries.fasta create mode 100755 tests/test_data/outputs/search/G1/blast/protein/hsps.txt create mode 100755 tests/test_data/outputs/search/G1/blast/protein/queries.fasta create mode 100755 tests/test_data/outputs/search/G1/run.json create mode 100755 tests/test_data/outputs/search/G1/seq_store.json create mode 100755 tests/test_data/outputs/search/G10/blast/nucleotide/hsps.txt create mode 100755 tests/test_data/outputs/search/G10/blast/nucleotide/queries.fasta create mode 100755 tests/test_data/outputs/search/G10/blast/protein/hsps.txt create mode 100755 tests/test_data/outputs/search/G10/blast/protein/queries.fasta create mode 100755 tests/test_data/outputs/search/G10/run.json create mode 100755 tests/test_data/outputs/search/G10/seq_store.json create mode 100755 tests/test_data/outputs/search/G11/blast/nucleotide/hsps.txt create mode 100755 tests/test_data/outputs/search/G11/blast/nucleotide/queries.fasta create mode 100755 tests/test_data/outputs/search/G11/blast/protein/hsps.txt create mode 100755 tests/test_data/outputs/search/G11/blast/protein/queries.fasta create mode 100755 tests/test_data/outputs/search/G11/run.json create mode 100755 tests/test_data/outputs/search/G11/seq_store.json create mode 100755 tests/test_data/outputs/search/G12/blast/nucleotide/hsps.txt create mode 100755 tests/test_data/outputs/search/G12/blast/nucleotide/queries.fasta create mode 100755 tests/test_data/outputs/search/G12/blast/protein/hsps.txt create mode 100755 tests/test_data/outputs/search/G12/blast/protein/queries.fasta create mode 100755 tests/test_data/outputs/search/G12/run.json create mode 100755 tests/test_data/outputs/search/G12/seq_store.json create mode 100755 tests/test_data/outputs/search/G13/blast/nucleotide/hsps.txt create mode 100755 tests/test_data/outputs/search/G13/blast/nucleotide/queries.fasta create mode 100755 tests/test_data/outputs/search/G13/blast/protein/hsps.txt create mode 100755 tests/test_data/outputs/search/G13/blast/protein/queries.fasta create mode 100755 tests/test_data/outputs/search/G13/run.json create mode 100755 tests/test_data/outputs/search/G13/seq_store.json create mode 100755 tests/test_data/outputs/search/G14/blast/nucleotide/hsps.txt create mode 100755 tests/test_data/outputs/search/G14/blast/nucleotide/queries.fasta create mode 100755 tests/test_data/outputs/search/G14/blast/protein/hsps.txt create mode 100755 tests/test_data/outputs/search/G14/blast/protein/queries.fasta create mode 100755 tests/test_data/outputs/search/G14/run.json create mode 100755 tests/test_data/outputs/search/G14/seq_store.json create mode 100755 tests/test_data/outputs/search/G2/blast/nucleotide/hsps.txt create mode 100755 tests/test_data/outputs/search/G2/blast/nucleotide/queries.fasta create mode 100755 tests/test_data/outputs/search/G2/blast/protein/hsps.txt create mode 100755 tests/test_data/outputs/search/G2/blast/protein/queries.fasta create mode 100755 tests/test_data/outputs/search/G2/run.json create mode 100755 tests/test_data/outputs/search/G2/seq_store.json create mode 100755 tests/test_data/outputs/search/G3/blast/nucleotide/hsps.txt create mode 100755 tests/test_data/outputs/search/G3/blast/nucleotide/queries.fasta create mode 100755 tests/test_data/outputs/search/G3/blast/protein/hsps.txt create mode 100755 tests/test_data/outputs/search/G3/blast/protein/queries.fasta create mode 100755 tests/test_data/outputs/search/G3/run.json create mode 100755 tests/test_data/outputs/search/G3/seq_store.json create mode 100755 tests/test_data/outputs/search/G4/blast/nucleotide/hsps.txt create mode 100755 tests/test_data/outputs/search/G4/blast/nucleotide/queries.fasta create mode 100755 tests/test_data/outputs/search/G4/blast/protein/hsps.txt create mode 100755 tests/test_data/outputs/search/G4/blast/protein/queries.fasta create mode 100755 tests/test_data/outputs/search/G4/run.json create mode 100755 tests/test_data/outputs/search/G4/seq_store.json create mode 100755 tests/test_data/outputs/search/G5/blast/nucleotide/hsps.txt create mode 100755 tests/test_data/outputs/search/G5/blast/nucleotide/queries.fasta create mode 100755 tests/test_data/outputs/search/G5/blast/protein/hsps.txt create mode 100755 tests/test_data/outputs/search/G5/blast/protein/queries.fasta create mode 100755 tests/test_data/outputs/search/G5/run.json create mode 100755 tests/test_data/outputs/search/G5/seq_store.json create mode 100755 tests/test_data/outputs/search/G6/blast/nucleotide/hsps.txt create mode 100755 tests/test_data/outputs/search/G6/blast/nucleotide/queries.fasta create mode 100755 tests/test_data/outputs/search/G6/blast/protein/hsps.txt create mode 100755 tests/test_data/outputs/search/G6/blast/protein/queries.fasta create mode 100755 tests/test_data/outputs/search/G6/run.json create mode 100755 tests/test_data/outputs/search/G6/seq_store.json create mode 100755 tests/test_data/outputs/search/G7/blast/nucleotide/hsps.txt create mode 100755 tests/test_data/outputs/search/G7/blast/nucleotide/queries.fasta create mode 100755 tests/test_data/outputs/search/G7/blast/protein/hsps.txt create mode 100755 tests/test_data/outputs/search/G7/blast/protein/queries.fasta create mode 100755 tests/test_data/outputs/search/G7/run.json create mode 100755 tests/test_data/outputs/search/G7/seq_store.json create mode 100755 tests/test_data/outputs/search/G8/blast/nucleotide/hsps.txt create mode 100755 tests/test_data/outputs/search/G8/blast/nucleotide/queries.fasta create mode 100755 tests/test_data/outputs/search/G8/blast/protein/hsps.txt create mode 100755 tests/test_data/outputs/search/G8/blast/protein/queries.fasta create mode 100755 tests/test_data/outputs/search/G8/run.json create mode 100755 tests/test_data/outputs/search/G8/seq_store.json create mode 100755 tests/test_data/outputs/search/G9/blast/nucleotide/hsps.txt create mode 100755 tests/test_data/outputs/search/G9/blast/nucleotide/queries.fasta create mode 100755 tests/test_data/outputs/search/G9/blast/protein/hsps.txt create mode 100755 tests/test_data/outputs/search/G9/blast/protein/queries.fasta create mode 100755 tests/test_data/outputs/search/G9/run.json create mode 100755 tests/test_data/outputs/search/G9/seq_store.json diff --git a/locidex/classes/extractor.py b/locidex/classes/extractor.py index 7a887f7..2bc941c 100644 --- a/locidex/classes/extractor.py +++ b/locidex/classes/extractor.py @@ -25,7 +25,6 @@ def __init__(self,df,seq_data,sseqid_col,queryid_col,qstart_col,qend_col,qlen_co self.df = self.recursive_filter_redundant_queries(self.df, 'locus_name', sseqid_col, bitscore_col, sort_cols, ascending_cols, overlap_threshold=1) - self.df = self.extend(self.df,sseqid_col, queryid_col, qstart_col, qend_col, sstart_col,send_col,slen_col, qlen_col, bitscore_col, overlap_threshold=overlap_thresh) self.df = self.set_extraction_pos(self.df, sstart_col, send_col) loci_ranges = self.group_by_locus(self.df,sseqid_col, queryid_col,qlen_col,extend_threshold_ratio) @@ -413,7 +412,6 @@ def extend(self,df,seqid_col, queryid_col, qstart_col, qend_col, sstart_col,send df = df.sort_values(sort_cols, ascending=ascending_cols).reset_index(drop=True) queries = df[queryid_col].to_list() - #Remove incomplete hits when complete ones are present filtered = [] for query in queries: @@ -425,10 +423,6 @@ def extend(self,df,seqid_col, queryid_col, qstart_col, qend_col, sstart_col,send else: filtered.append(subset) df = pd.concat(filtered, ignore_index=True) - trunc_records = df[df['is_complete'] == False] - if len(trunc_records) == 0: - return df - is_extended = [] five_p_ext = [] three_p_ext = [] diff --git a/tests/test_data/genomes/G1.fasta b/tests/test_data/genomes/G1.fasta new file mode 100755 index 0000000..94ea845 --- /dev/null +++ b/tests/test_data/genomes/G1.fasta @@ -0,0 +1,2 @@ +>G1 +NNNNNNNNNNNatgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaaNNNNNNNNNNNatgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaaNNNNNNNNNNNgtgaacctggtgaaaaccacgggtctgcacgcggcgggcaaaggcattaacgttgccaaagtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccgacttcaacttttccggctttgacgtgactcccgcagactgggaacgctttgttaacgactccctgagctggctgggtcagttcgatatggtctgcgtcagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatctttgatagtagccgtgaagcgttagtcgccggtcttaaagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcacgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgtgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggtaagtaccgtcggcgcaggcgattccatggttggcggattgatttacggcctgctgatgcgcgagtccagcgaacatacgctgcgtctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactgaNNNNNNNNNNNatgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatgaNNNNNNNNNNNatgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctgaNNNNNNNNNNNttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctcttccgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccaacgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcagaaaggcgggattggtttgcgcggtccgggtgaaacccagctcgaaaccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgaaaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgctgctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaaatccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtctgcaggtacgaatgccgattgtcgactggcgtcgcctctgtaaacaagaaccagcgttgatcgaatacgtgatctagNNNNNNNNNNNgtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatgaNNNNNNNNNNNatgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctggtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatcagcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacgctgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcattcaaccggaatggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaacgctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgaggcgctggcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtgagctgaccagccttgtgcgcaaccttaatcaactgctcaaaagcgagcgcgaacgttataacaaataccgcacgaccctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatcagcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcataccccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttttggccgacagcatcccacacagaaagaggaataaNNNNNNNNNNNatgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataaNNNNNNNNNNNatgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtagNNNNNNNNNNNatgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacattgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccaccgtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggccgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattactggaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcattcagggcggtacggaagtttgtcagcaaaaactggataccatgcgcaacgcaggcgtaaaagtgaacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggcgtcagcgcagagatcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacaggagaccggcaagctcggcgagatcctgttctttatgcgtgcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccggcaataactggcagttcgacggcgacgcggaaactattgcccactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgaaattaccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaaNNNNNNNNNNNatgaaactttttctgacaacggcagcgctaactgcgaccttgatttccggcatggcatttgccagcgatcccgtcatcccctgggcgactaacagcggcggtacggaaagcacgcatattgcggcaatgggcgaggatttgaatgcgcagcatcagcagatcacccacacgcatgaaggcgtatgggcggccaattccggcagcattcaggctgatgaagccgcgctgaccagcaacaagccgccggtacaaggccacccggaattgatgccgcatcagggctaaNNNNNNNNNNNatgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaaNNNNNNNNNNNatggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaaNNNNNNNNNNNctgaaactaatacctttttatttgctcgcgctattttctgccgcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaaccactgcagataacccgccttataccatgacaatattgccgccagtagaaggctgcggggaaaaatgtgtgaaactgaacgtacagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaaggacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctataaccgacagctgcaattactttgggctggcgtaataccgaaaaataaaaaggggaaatggataacgaaaatggaaaattactggttcaggcaggtaaaagcgcactaaNNNNNNNNNNNatgaaaaaattttatagctgtcttcctgtctttttactgatcggctgtgcccaggtgcccctcccttcctccgtgagcaaaccggtacagcaacctggcgctcagaaagagcaactggccaacgcaaatagtattgatgagtgtcagtctcttccgtatgtgccgtcagaccttgcgaagaataaatcattatcaaaccagaacgctgataattccgcatcaaaaaatagcgcaatcagctcaagcattttttgcgaaaaatataaacaaaccaaagaacaggcgcttaccttcttccaggaacatccacaatacatgcgttcgaaagaagatgaagagcaactcatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataaNNNNNNNNNNNatgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaaNNNNNNNNNNNgtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggcctcacaacatgcctggagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgtcagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcaccatccagtcggcaacctggttttccgcaggcaggtcgcttacggcggataaaaaaatgatggattgcggcagcggtatttatgcttcaatcaatacgttactgaaaaagtcgcagaataaaaatatagtcatttttacccataatcattgtttgacgtatattgcaaaaaataagcgcggcgtgaagtttgatccggactatcttaatgcgctcgttatgtatgcggaaaacggaaaactctttttagacggtgaatttgttcccggctagNNNNNNNNNNNatgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatgaNNNNNNNNNNNatgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctagNNNNNNNNNNN----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- diff --git a/tests/test_data/genomes/G10.fasta b/tests/test_data/genomes/G10.fasta new file mode 100755 index 0000000..2f4eb03 --- /dev/null +++ b/tests/test_data/genomes/G10.fasta @@ -0,0 +1,2 @@ +>G10 +NNNNNNNNNNNatgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaaNNNNNNNNNNNatgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaaNNNNNNNNNNNgtgaacctggtgaaaaccccgggtctgcacgcggcgggcaaaggcattaacgttgccaatgtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccaacttcaactttcccggctttgacgtgactcccgcagactgggaacgctttgttaacgacttcctgagctggctgggtcagttcgatatggtctgcgtaagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatcgttgatactagccgtgtagagttagtcgccggtcttatagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcccgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgcgggttaacgcctcaggag-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------aNNNNNNNNNNNatgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatgaNNNNNNNNNNNatgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctgaNNNNNNNNNNNttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctattacgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccagcgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcggaaaggcgggattggttcgcgcggtccgggtgaaacccagctcgaagccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgagaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgcagctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaattccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtccgcaggtacgaatgccgattgtcgactggcgtcgcttctgtaaacaagaaccagcgttgatcgaatacgtgatctagNNNNNNNNNNNgtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatgaNNNNNNNNNNNatgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctcgtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatctgcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacggtgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcactcaaccggaagggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaaggctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgatgcgctgtcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtaagctgatcagccttgtgcgcaaccttaatcaactgctcaaaagcgagcacgaacgttataacaaataccgcacgagcctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatctgcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcatagcccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttctggccgacagcatcccacacagaaagaggaataaNNNNNNNNNNNatgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataaNNNNNNNNNNNatgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtagNNNNNNNNNNNatgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacatcgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccacagtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggtcgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattacttgaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcatacagggcggtacggaagtttgtcagcaaaaactggataacatgcgcaacgcaggcgtaaaagtgtacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggggtcagcgcagagctcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacagaagaccggcaagctcggcgagatcctgttctttatgcgggcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccagcaatgactggcagttcgacggcgacgcggaaactattgcacactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgagatttccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaaNNNNNNNNNNNatgaaactttttctgacaacggcagcgctaactgcgaccttgacttccggcatgggatttgccagcgataccgtgatcccctgggcgactaacagtggcggtacggaaagcacgcatatcgcggcaatgggcgaggatgtgaatgcgcagcatcagcacatcatccacacgcatgaaggcgtatgtgccgcaaattccggcaccattcaggctgatgaagccgcgctgaccagcaacaagccaccggtacaagtccaaccggaattgttgccgcatcagggctaaNNNNNNNNNNNatgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaaNNNNNNNNNNNatggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaaNNNNNNNNNNNctgaaactaatacctttttatttgctcgcgctattttctgcctcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaacccctgcagataacccgccttataccgtgacaatattgccgccagtagaagcctgcggggaaaaatgtgtgaaactgaacgtagagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaacgacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctacaaccgacagctgcaattacttagggctggcgtaataccgaaaaataaaaagtggaaatggataacgaagatggaaaattactggttcaggcgggtaaaagcgcactaaNNNNNNNNNNNatgaaaaaatttgatagctgtcatcctgtctttttactgatcggctgtgcccaggtgcctctccgttcctccgtgagcaaaccggtacagcaacctagcgctcagaaagagcaactggccaacgcaaatggtattgatgagtgtcagtcacttccgtatgtgccttcagaccttgcgaagaataaatcattatcaaaccagatcgctgataataccgcatcaaaaaatagcgcaatcagctcacgcattttttgcgaaaaatataagcaaaccaaagaacaggcgcttaccttcttccaggaacttccacaatacatgcgttcgaaagaagttgaagagcaacacatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataaNNNNNNNNNNNatgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaaNNNNNNNNNNNgtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggactcacaacatgcctgtagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgccagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcgccatccagtcggcaacctggttttcggcaggcaggtcgcgtagggcggttaaaaaaatgatggattgcggcagcggtatttattcttcaatcaatacgttactgaaaaattcgcagattaaaaatatagtcattttaacccataatcattgtttgacgtatattgtaaaaaataagcgcggcgtgaagtttgatccggagtatcttaatgcgctcgttatgtatgcggaaaacggaaaactccttttagacggtgaatttgttcccggctagNNNNNNNNNNNatgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatgaNNNNNNNNNNNatgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctagNNNNNNNNNNN----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- diff --git a/tests/test_data/genomes/G11.fasta b/tests/test_data/genomes/G11.fasta new file mode 100755 index 0000000..38e46ff --- /dev/null +++ b/tests/test_data/genomes/G11.fasta @@ -0,0 +1,2 @@ +>G11 +NNNNNNNNNNNtcattctgacacctccattttttgcgccattttggatgctctgtattcagggatggtggtcacaatcgcaccgactaacgcgaacagcgtaccgatgatggtgaccaggtagaccgtattgcctaatgaagggatcaattcatcaattagcactgagccaagcagttggcctgctgttgacgctacgcccagcatcaatagccctaagcctctcaccagaatcgccattagcccgatggatagcagacccagcggaccaccgagatacatccaccatgtatcgggtaactggatggtgacatggcctaatgcgatacgtatcgccagcgccgcgcccaggacacaaaagccgacgatgaagttccatgtaatggacaccagcatggagcccgttgcctcggcgactttcgcattccccgcaggctgccagccagcgagtaaccctgccaaaaaggggaggatagcgagcaggataaacgaggttgagtgccactgtggcgacacgacaaaaatggtggcgataacggcgaacaatgcgccagtaatgcgccatggcgtaaaatattttttctcctccacgccgatgccaaaacggtcgcacagcaggccggaaagaagcagagcggaaattaatgccgtttgaaaggtggcaacgcccagcgcgctggcggatgcgccttcagaaaatacgaccatcgccccgcataatcctgcaaaccaattccatagcgggatttttctctttttaatcagagtagggattgaggcgaattgctggcgtgtttctttgcgcgcaataataataaaaaacatgacgaccagaccgctggcaaacgagattactgcgcaagcattaccgtcttgtaaccaatgtcctaactgcccattaacggcagactgcatcggggaaagcataccggctaagatggtggcaagcatcagtaagggggttgagtacttattcttgttcatNNNNNNNNNNNctagaatgaaccggaatgcggctctaaactcatcccataatcgctgattaatcgccacgttaccgtaccatgaggggcggcagtcgaactcaatttaaatgacgctgatgattttggcggaacgaaggtcgctgatttcacttcatggctgttaagcgtcacactggcaaagttcatataatagggggtagggttatttacggtaataacatcccctgccgtctgccattttaattgttggctctggctatcaggcgttgatttggttaatgctggcggtcgataaataagctttatttgggtattaatggagatttccacgcggttcgcggaggcattatcatcaatagaaggaatacccttaatattgagccagtacatagactcccgatctgcaggtagaggggcgccagagcgaataacccgaatactgttcttttgcccggcatcaaggcgaaaaagaggcggggtgataataaacacctgcttatttgtgacctggggatcggcaaccgataaccatgactgaacaagattcgctttgctgtctttattctctacattgattgacgactcatcattattgccgtcgaaaaccagtcgggttccgccaacaacaatgctggcatgggcaacatggcttgtcagtaatacgaccaacacggatttctttatgtgtttcatNNNNNNNNNNNtcatgccgcttgctcctgcacccacttcgtcaccgtggcgcgaatgtcgcgcatgacggcatccagcggctgggtcgcgtcaatggtgcggatacgcgagtcttgcgccgccagttccagatagcgcgcgcgagtccggttaaagaaatcaaaagattcctgctcaatgcgatccaaatcgccgcgcgctctggcgcgttttagcccgacttccggcgtgacatccagatacagcgtcaggtcaggacgaaaatcgcccagcacggcatcgcgtagtgtcgccagcatggtttgatcaatgccgcgccctcccccctgatacgcctgggtcgagagatcgtggcgatcgccgatcacccatacgccttgcgccagcgcgggtttgattaccgtttcgacgagctgtacgcgagcggcataaaacatcagcacttccgctttatcggtaatcacttcgtcgcctaccgatcggatatccagcaccagacttcttagtttttcggcaagctgcgtaccgcccggctcacgggtaaaaatcatgttacgaatacccagttgctcaagggtctccaccaccacgtcgcgcgcagtggtttttccggcgccttccaggccctcgatgacgatataattactgcccatNNNNNNNNNNNctagccgggaacaaattcaccgtctaaaaagagttttccgttttccgcatacataacgagcgcattaagatagtccggatcaaacttcacgccgcgcttattttttgcaatatacgtcaaacaatgattatgggtaaaaatgactatatttttattctgcgactttttcagtaacgtattgattgaagcataaataccgctgccgcaatccatcatttttttatccgccgtaagcgacctgcctgcggaaaaccaggttgccgactggatggtgcgcaccgtattactggaatagagattgtaattttgtatatcggcactgaaggctttacccagcgctctggcatcttgcgcaccgttgaccgtaatccccgtgctgtctgacaggcaggtattatcggaacgatcgcaccgctcggcatggcggaataacacgactaccggatgctgcttcgccagcgccgccagcgccttaccgttaatctgcggtagaccgttaccgctccaggcatgttgtgaggccaacccagcgataataaccaacgcccccgccagaatcgcgaaatagcgtttgtttttaataaagcgtagggtaaatgccagcacNNNNNNNNNNNttaaaccgcttgttcaaggctgaaactgtgacagtgaacctgcggtttcagcctgtcgttcataattttaatcgcatcgccaagctgcatggttcggcctgctatcaccacgcctggctgtgccagcagacacaacgcggcattttctcccggctcaacgaccagcaaattaacctgttccgccgtatcgcttaaccagacgcttgcggcatcacccgtgcccggggtccacatttcgccatgcgccacaaaatgccagcttttcggcatctgcggtttgagatagcgaatcgccaccagcgcattcaataccagctccgcgcgttgctctttggttaattcgaaatcccggcatttttcttcaaaggaaaaatagagcgcggcatcatccacacaaaaaccggtcgggcaaaacgcgtccggcgtaagcattttacgagagaagcgcgagcgaaaaagcataccattggcgagatcgagcatcatacgatcgtgctcttcatcataataccagcgccagttatcgtcaggtttaattcgcatNNNNNNNNNNNttataaggcttgcagtctttcatgggcagcaagtaacgtctgatatatgcttaaattcttacttccgggttcaagtagaacttttttaaattcggtcatgagttgctcttcatcttctttcgaacgcatgtattgtggatgttcctggaagaaggtaagcgcctgttctttggtttgtttatatttttcgcaaaaaatgcttgagctgattgcgctattttttgatgcggaattatcagcgttctggtttgataatgatttattcttcgcaaggtctgacggcacatacggaagagactgacactcatcaatactatttgcgttggccagttgctctttctgagcgccaggttgctgtaccggtttgctcacggaggaagggaggggcacctgggcacagccgatcagtaaaaagacaggaagacagctataaaattttttcatNNNNNNNNNNNttagtgcgcttttacctgcctgaaccagtaattttccattttcgttatccatttcccctttttatttttcggtattacgccagcccaaagtaattgcagctgtcggttataggttccaatagcgtgtccaccatacgccccttcaaatgctatcaccatgggtccctgcccttttaccccctcttgtccttcatggcaataaagcgcgttgcggtgagtcacgtcaagttcatactgtacgttcagtttcacacatttttccccgcagccttctactggcggcaatattgtcatggtataaggcgggttatctgcagtggttttccatgttccgatcaggtctttgcaggcgtttatctccgtagccccggaggcggcagaaaatagcgcgagcaaataaaaaggtattagtttcagNNNNNNNNNNNttagtcccaccaaacgtcgaaaagttcgctggttcggacttcttcaaggttgcgcgcttccagccacttacgcacaatcgcctgatgttcttcggtacatttaccgatttcctgcatacaaatcagcccttcccaggccaggtagccgctgccgtcaaacgccagtttattaggctcaataacgtcattaataaagtcatcgacagtcttatcgatctgctcttcagatgtaccttccggaaaacgccatgccaccgaaaatcctaattcctggaattcgtcaatgtgcatttttttacgcagacgacgactacggttctttgccatNNNNNNNNNNNttaaggcgtcacaatcagcagtccctcgctcgcgccggttgcccgccactgaggcgcgtacatcgattccacctgcggttgcgcaagctggtacgtccccggcgttaccgcgcgcgccaggtagaccagcgtcacgggctggccctcattgacaacgacggcagccacaaaccgatcgtcgcggaattccatatactgaatatccgcctgctgcatctgattaagcagattttgcacttcgctaccgctctccggcaggctggcgctgctgtcagccagattctggttttccagctccagcccggccgggagcaggtccaccaccagcgcatccggcacattgcgatcggccactaccgttaaccagaccagcaccagttcgccgctacgcaacgaggacagcgatttgcgctgaccatcggtccccagtatttgtcgttcaatctgcaaaacgttgctggcaggctcaggcgcagatgagggatagccgctgctatccagacgcagccatagcggctggctaccggtgttcgtcacctcaagggcggccagctgatcagcatccagattacgggtcagcgccttgtcgcccgacagcggctgcgcctctaacgaggtctgcgcctgccaggcgcccgcgctggcctgtcgcgaatgcgcggcgaggaacaaggcattgttctcctgggtagagagccagcgctgaccgaaggcctgctcagaaagcgagcttaatagcgcgttttgcgcgtccggtctgaggttgttctcttccagtaacgacaacatcagagcgttatcgcgcagagaactgccgtaatccgctatccattgccgttcgtcctgacgcggcgtattcagagccagcgtaatggcctcttcgccgcgtctggcatcacccatcgtgtttaacgcgatgcccaattgcatcagcggcagtcctgaagccgcctgactacggcgctcccagatttcgcgcagcgcgccgagcggcgctttctgctgacgcgccagtaccagcgcggcgtaagcctgagcggcaaaagtactggcctgggtattatcgctataacgaatcagcatcgtaccgggatcctgcagatagcgcagcagtcgctcattgccccggttaatggcctccggcgggacgctatatccctgctcgcccgcgcgaatgaggaaatccatcgcgtaggccgttagccagggctcttccgccccattttcatcccatagcgcaaaaccgccgttatcacgctgcatctgtagtatgcgggagatgccgatatccaccgcggcgcgccgtttttcatcgctatcgccggtaataccgagcgactgcaattgagcggcattggtatacagcgccgggaataacccgctggtggtttgttccaggcacccgtacggatatgctttcagctcgcgaatgtagcgcgccagattgagcggcggttttccgcttaacagcagttgtccctgtaacgtggctggcgagacgtttgccagatgctgctctggtacatgccagctctctcccggcgccagcgcaatgccgctatttaccgtttgggcaggccaggccggacgcacgccgatttgccactgcttatgctgcgcgccgagggtttctcccggcagattcagaccgctaatggtcgcctggatttcgccttcgccaaaaccttccagcgcgcgtaccggaacgaataaggtggtgcgcacgcccggcgccaggttgaccggttgcggctgttgactaagcagttccagtaacccactggcggcgagcgcaatattcagcgtctgcgggcggtcggtcagattggtgacgtccagcaccagtcgcgaaacatcccctcccgccagaaaacgcggcatattcagctcggcaatcactggcgcggcgacaacgactttgctttcgccgcgaccaaaatcgtccgctgtccatgcctgcgccataacccgcagttcgccgttaaagtcgccaatcggcagcgttacgaccccttcgccctgctcattgagcgtgatcggctgcgcctgctgcgcgatgatattggcatggtttaccggcggttttccgccgcgcgtaaggtcgtcgccatcgccgccaaaacgcaacgccgccagccgcccctgcccttcaatgacctggccgtaaatatcgtagatatccgcaccgtagcgtttttgaccgaagaacgcctgccacgggtccggcgtcgcgtaatcggtgatattcaatacaccgctatcgaccgcggagaccagcacgttgatctgttttggcatttcgccgtgtttaacgctggctttcaccctgacggtgagcggctgattcgggcgcattttagccgggctttccagcgccagatcgaggcggcggttgtcatcccccagcggtagatgtagtaaccccacggcgcgttttggcgtcgcggaacgagatttatcgccgggacgcaccaccagcgtactgagatagagatcgtggcgattccaggttttatccaccggaatcgtgagctccagcccctgcgccggcacgtcgatcgcctgccaccacagcggaccatcgctggactccaccatggcataacctttaccggcgaccggcgcggcgatatgcaatttcatggtgtcgcctggacgataattcgctttatccagtttgagggtgacgcgatccggacgcgccgcgccgctaccgtcgctgttatcctgccagctatagccggcccagaaacgaacgctgctgaccgtctcattcggcgctttgacctccagacggtacgcgccccattccaccgggaagctgacttttccggtttcatccgcgttcagatccagcgtctgctcgccctccaccagatctttttgatcaaactgcgactgccagccttcgctttccgaccagttccagtaatagtcgcgacgctcgcggatgagccgcacctgtaaaccggacaccgcttttttctcgccctgcgcgttggcgtaaacaatatcgaatgcggcgttgctgtcttcgtcgacaatcggttgattaacggtggtatccgtacggtagtcgtataccgctttggcggcgaactgtggacgaattcccggtaacgtatcggcaggccaaatcgcctgctctacgcgacgagtgaccggacgaccgccagactccagcaggctggcctgtagaattacctgcaacggcgaatgcgcttcttgccactggctggcagcactcacttcaccacgtccgcctttatccaacgtcagttgaacttcgtccaggctgcgcgaaagattctcttcggcaatattgccgaactggaagccaggcaacgccgcgacagcgtcgcgcagcggacgcaggaaaagttgcccttgcagggtattgccgttagcaggggcgccatacaagtaatagccgacaacggagaatttcacctcatccgcaggcgccagcggtgttttttgcgccgtcaggttgagcgccatccgctccggcataaagtcttccacgtggaaatcccaactccgcagcaaattatcgccggtgttggcgcggacatgccacaagccggtcggcgcgttgatatccagcggataattcaaacggtatagtccgttttccggctggctgacgacggtacgcatcacttgtccgtctggttttaccacttccagcttaacgggttgatcgggcagcgttttaccgtcgctatcgcgcagtaatccgttgaggataaccgtttcgcccggtcggtagagatcgcgtgggccgaacataaagaactgcttgctgtagccgggcgcgccagcgacattaaactccgacagatccagagccggaagcgtgagatcgagcagcgtggtctgcccctctttacgcgccagtaatagcgccgccgctttatcagcctccagttgcacatgtccctgcgcgtcgctcgtcgcctgcgccagcgtctgccctttatcattcagaagaacgatctcaattcccgactgcgccgcgccgttttccaggctttgcgtaaagatatccagccgactatggtaacggtgcgcggacacgccgatatcgctaagggtaaacaacgtagcggcattactatagttgtagtgtccagcctgattcattaccgccacatatacgcccgcctgttgcagcggcttaatatcgcttaatggcagcagcagtttctcacgcgtattacgcgccggattaagatcaaaacgaccggtataaaccagatccgccattttcagcagattgtcggattcccagttagagagggaactacggtactcccactggctgacaaacgacgccagcgatccgggcttaacgcggaaaaagttcacatcaacgtggttgacgttaagcgccatgaccggcagtccttccgctattttccccggtagcagcgatccccggctggcaaagccgacgctgggctggacatcacgcgtggtaatcgttttttcataagacttgccgaaggtggcgttattcagcgctttaacggcgggatcaaccgtgaccaccagcacgcgctcaggttccagatgacgtaaccttagctcttttaaatttggcgccagctcccatgcgccgtcaacgctgccgcttttcttatcaaccacgtgaaccacacgggagaaatcctgttcaggatctaaaggaattgaaaacgtcagcaccagcgtcgccgcgccgtcgagctgcgcttcggaggcgtctaatagcgtgagcgctttgccctggctttgctgcgccagcttttgtagctgtgacgggtcttgcgcgggcgagggctgcgctacggctggcgcttcgcttttagtcgtcggggcggttttatcgttgttatcgcatcccgccagcgccagcatgatcatgcaggccaccacgcgtaaatgtttcatNNNNNNNNNNNttagccctgatgcggcatcaattccgggtggccttgtaccggcggcttgttgctggtcagcgcggcttcatcagcctgaatgctgccggaattggccgcccatacgccttcatgcgtgtgggtgatctgctgatgctgcgcattcaaatcctcgcccattgccgcaatatgcgtgctttccgtaccgccgctgttagtcgcccaggggatgacgggatcgctggcaaatgccatgccggaaatcaaggtcgcagttagcgctgccgttgtcagaaaaagtttcatNNNNNNNNNNNttagatattccgtaaagaagcaaaaagtaaagcccactcgctcttcgcgcgatagaagaccggcggcttgccaatcggcgcatccacggtaatttcaccgccgtggtgagcttcgccggtccagatattcacccagtgatcttccggcaggtacagcgtccaatcgcaacgcccctgctcgtgaaccggcgcgaccagcagatcctgaccgagcagatattgatatttcagggtgtaggtagcggcatcgttctcgtagtgcaggaatagcggacgcatgaccggcagaccggtagccgcgttttgcgccaccgcctgcttgagatacggtttcagcgtggtaaagacggtggtcatgcgggcaaagtgggcaatagtttccgcgtcgccgtcgaactgccagttattgccggggcggttgccttcatgggtgcgcatcatcggcgtaaaggcgctgaaatcgcaccagcgcagcagcaactctttgctgcgcttcatgtcaaacagggtggtgtagccgccgatatcgctgtgatgcagaccatggccggtcatcgccagcgacaatgcagcaggcacgacagaggccagaccatcatcaagactccagtcaacgttctggtcgcctgcccacatcatggtggaatatttctgactgccggtgtaacccgcacgcataaagaacaggatctcgccgagcttgccggtctcctgtagcgcttcgtagttacacttcgcccacagtgcgggccaggcgttatgcatgatctctgcgctgacgccgttgtgcagatacgtgtcggtcggcagatattcgccgaaatccgccatccagccgctgcagccgagcgcgatcatgttctttttgatgacatctttgaaccagtcgtaagcttcaggattagtcagatcgaccacgccgccatagaattcgccaaactcgaccagatagtcgccgcccgtggcgtctttcgccagatagccgtgtctcgccgcctcagcgcagaggtctttatcactggcgacgtatgggttgatataagagaggaactggacgccttcttctttccactgtttgatccggctatccagctgtggatagttgtcgctattccacttccagttccacatcacgcgcttgccaaaggaggtcatgcggataccggaccagtcctgcgcccaaataccgttcacttttacgcctgcgttgcgcatggtatccagtttttgctgacaaacttccgtaccgccctgaatgccgagcgtgacgccgtcgtaaacccagtccggcagctccggctggcgacctaacagcgcagtcagtttttccagcagggcgatgtaggtgtcggcacactcaaaacgcagcgtagttttatcttcccacagcgccagttcgtgatactccggcgcgctgaagtcgaaattcatatagcagctattatcgacgtggcagtaatacttctgcgtgctgacaaaggtcggttgcgggaagaaggtccagtaatagtcgccgccggcgttctctttacagtctgcctgccaggtgacatagctggttttattacggccaacgccctgttcgctggtccacagcgggaacggcttgccgcgcaaatcgaaataagagaactgttcgccgcagccgtagatatggtcgtctggattagctgcgaggcgtaaccagatacggttatggtgcaggtcgtcgttttgcagatccaacgtcaggcgtcccgcctcatcggcggagatgcgaagggtggcgctaattgttgcgccacggctgaattgtaccagccagccgtcgggtagctcgctgacggtggcctccgttaatgcaatcttctcgttaagtttgtctttgatgctgaagttgccgcgaaacatgtcaatgtcggcaacgcccgcgccaatccacagacaggggttttcggcgctgtggcgtaaaatcaggcgctgttgccagctaagcgcaaaaccatcctgtgatgttgtcagttcaaaatcggttgaccgttgtggtagagaattcatNNNNNNNNNNNctactcatcttcaagataagtataaccgtacagtcccgcttcaaattcctcaaggaactgctgctgcaacgcatcgtccagatccgtctgttttacctggtcgcggaaatgcgttaatagcgttttcggatccagttgcacatattgcagcatatccgcaacggtatcgccttcgtccgacaactcaacctcgacactaccatccgggaagacaaacacgtcaaccgcttcagtatcgccaaacaggttgtgcatgttaccgaggatctcctgataggcgccgaccataaagaagccgagcatcggcggattctctggatcgtattccggcatcggcatcgtcgtggcgataccgtcgccatcgatatagtggtcgatagcgccatcggaatcacaggtaatatccagcagcacggcacgacgttccggtacctgatctaacccttccagcggcagcaccggaaagagctgatcgattccccacgcgtccggcatcgactggaacagcgagaagttgacgtacattttgtccgccatccgctcttgcagttcgtcgataatcgggcgatgcgcacggttttgcgggtccagttgcttctgcacttcatggcacatgctgagataaagttgctccgcccaggcgcgctcctgcaaactaaacgcgccggaagagtagccgatatgaatatcgtgcagatccatttggctatcatgcagccattcacgcagcgagcggcgggtgccaggcttatgcatctcctgccaggtttcccacagattttgcagcgcgcgcggcgcatcttcagcaggggcggtcggatccgtgtattcgttacgctccacgccgataatgttagagaccagtaccgtatggtgcgcagtgacggcgcgcccagactcggtaatcaccgtcggatgcggtaaaccatgctcttcgcaggcatcgccaatcgcccagatgatgttattggcatattcgttcaggccatagttcaccgaacagtcggactgcgagcgggtaccttcataatccacgcccagaccgccgcccacgtcgaagcactggatattaacgcccagcttatgcagctcaacatagaaacgcgcggactcgcgcacgccggtcgcgatatcgcgaatgttcgccatctgcgatcccaggtggaagtgcaacagttgcagactgtccagacgcccagcgtcacgcagggtctccaccagttgcagcacctgcgtcgccgccaggccgaattttgatttttcgccgccggaggattgccacttaccggacccctgagaggccagacgcgcacgcacgcccaggcgaggaaccacgttcaggcgctcggcctcttccagcacaatcgcgatttcagacatcttttcgatgaccagataaaccttatggcccatcttctcgccaatcagcgccagccgaatatattcacggtctttataaccattacagacgatcacgctacgggtcatgccggcatgcgccagcaccgccatcaattccgctttcgaccccgcttccagccccaacggttcaccggaatggataagggactcgatcacgcggcgatgctgattgaccttaatcggataaacgaggaagtagtcgccgttataaccgtaagattcacgcgcacgcttaaacgccgcgttaattgaacgcaaacggtgttgcaggatctgcgggaagcagaacagcgccggcagacgctgaccttgcgcttcgcgcgctttcaccagtttggcaagatcgacacgcgcttccggtacgtcgggatcggggcatacgctaatatggcccagctcgttgacgtcgtagtagttattgccccaccaggcaatattgtaagtgcgcagcatcttgctggcttcctgggagctcattgcaacctcctgcatNNNNNNNNNNNttatccgatacgactgacttcatcaaataaggtggctaacccgctgcgccgttccgttcgcgtcacaatcgcgcctgccaggatccgttcatcggcatacagcgataaccgccgccgcgcccgcgtaacagcggtatacaccagctcccgcgtcacgaccggcgaacgttggctgggtaaaatcagcgcggcgtgatcaaattcagacccctgtgatttatgtaccgtcatcgcccaggttgtatcatgttccggcagacggctgggctgaacggacttgatcgtgccgtccggcatcacaaaccagacgcgtaacccctgcccgcgatcgagcgcaataccaatatcgccgttaaatagccccaacgcgctatcgttgcgcgcaatcattaccggacgcccttcataccagcgagagtgcggatgccgctgaatttttcgttgctgcaccatcgcctgctcaatgcggtcattcagtcccctcacgccaaatgggccttcgcgcagcgcacaaagcagttgatactcattgaaagcctgaaggattgcctccggcgccgctttttcatgcagcaaccgcaggtagcgcccatagcccgccagcgcttcatccagcatcccggcataatcgtcgctgctttgcaatgtacgcttctctatatcgctaaacccctgctgaaaaacagcctggattgccgacctgtcgccacagttaattgccgccgccagcttgccgatgccagaatcgctgccgaaacggtagctcttttgcaacaaacagaggctatcgcgtaaagacgcggcttgcgttccggcccccgccggaatggcgctaccagtgagtcgacttagctgtcgggcgcgttccgccgtaaaccctgcgttgacataggcgcaaatatcgcccaacacagcgcccgcctcaacggatgccaactgatcgcgatcgccaagaaaaatgacccgcccgtgcggcggcagagcgtcaatcaaacgtgacatcatcggcaaatcaatcattgatgcctcatcgaccaccagcacgtccagatgcagcgggttgcccgcatgatggcgtaatcgctggctgccgggctgtgcgcccagcagtcggtgcagcgtactggcgtcctccggtatacgctttttctgcgcatcggtaagaggaagctgacgcaacgccgcgccgagcgactccgtcaggcgtgcggccgctttcccggttggcgccgccagccggatacggcaacgttcgccatccgccatttgaattaatgccgccagcagcttcgcgacggtggtggttttaccggtgccgggaccgcctgaaatcacggagatacggcgagttagcgctacggcggcggccaccttttgccagttcacctcgtctgtcggagggaatagcgcgtccagaatacgggataactgatcttcatctacggcgatggcctggttaacctcgttaaaaaagcgcgcaaccgtacgctcgttgcaccacatgcgattcaggtagaggcgatcgccgcacagaattaacggcgcggggctatcgccgcagctaaccgccgcagacgccagtaaccgctttttccagtcgattggcgtagccgtttcgcttatccaggcgaccagtaagggatgcgcctcctccgttaacgttaaacgcgacaacggcagacacacgtgaccttcacctgcgtcatgactaagcagcgctgccgccagcgtcacggcgggatcgtcgttaccggcgacggttaaagcaaactgggcatcaatgggccgtaagagtttttgttcaacggcctccagcaaccgcttctggattgtcatNNNNNNNNNNNttattcctctttctgtgtgggatgctgtcggccaaaaacgacctccatacgggcgccaccgagcagactgtcgctggcaatgatctgcccggcgtattgttccgtaatctcgcgcgcgacagccagccccacgccttgtcctggtcgtagggtatcggcgcgctgaccgcgatcaaacaccagggaacgtttgctgtggggtatgcctgggccgtcatcttcgacgaaaatatgcaaatgatcgtcggtctggcgagccgaaatctcgacaaactccagacaatatttacaagcgttgtccagtacgttgcccattacttcgacaaagtcgttttgctcgccgacaaaactgatctctggtgaaatatccatactgatattcacccctttacgctgataaactttattgagcgcggagatcaggttatctaacaacggcgcgacgggatgcagttcgcggcttaacaacacgccgctaccgcgcatactggcgcgatgcagataatagccgatctgctgggaaatccgactgatctgttccagcatcaccggttcagctttgctgacgctcatcttttcgttgcgtaaagagcgtaacgtactctgcaaaaccgcgagcggcgtttttaaactgtgcgtcaggtcggttagggtcgtgcggtatttgttataacgttcgcgctcgcttttgagcagttgattaaggttgcgcacaaggctggtcagctcacgcgtcgtctccggattgagcatttcgcggtgatgatcttcaagttcgcggacttcccgcgccagcgcctcgatagggcgtaagctccaccaggcggcgatccacagtaaaggaatgactaacagtaaattggcggccagcacgtatacgaaccagctccacaccatataggagcgttttagctctatcggaatggtatcgaccaccacgatggttaactgcggcatccgcgtcgtggcaggataaatatttaccgctaccgagtgggtcatctccgcatcatcgtcatcttcacgtacttctttgagtttttcctgcgcggaatggtcctcgctcaacagcgtgctggtggcgtctacgttggtttcaatttcatggaagccgttcgtttttaaccattccggttgaatgcttttaatcagccaggggatgttgcgctgcgtccataataatttgcccgtttcatcgtaaatcagcgtcatggtcgggctttgcatgtccagattttcaggcagctcaacgctgattttattattttcccatttggcgagggtataaaacaggttgctttcgccgcgcagcagacgaaacgtggttttatcaaaacttacgctatagccgaccagcgccactatgccatatgccagagaaagcaccagcacgacgccggctgtcgccagcaaaaaacgaacccgcagcgacagcggcagaaaatggcgagcaaatttattcatNNNNNNNNNNNtcatttttctgtgatttgttctgcaagtcgggcaatacgccttgccattccccggaaaataaacaggtgcgccgggatcatcagtagccagtaaatcaggcccggcattccgtgtggatgccaccaggcgcgcacgtcaatttcgcggtagcggcctttatcgtgcagcgtgaagctaagccgccccagacccggcgctttcatgccaaacaagagcgtgagctgtttttctggttcgacaatgatcactttccagctatctaccgtatcgccaggcttgagcaaggtatgcgacgggcggcctttcgccagtttatgccccaccagacggtccatcgcggcgcgcgtctgccacaaaatattgccgaaaaaatagccctctttgccacccagccgatttacgacctgccatagcgccgataggctggccggggtctgcgcggtaaagcccgcctgctttggaaaatagccgtattcgggacgccagcgggcgaaggccagcgcgtcgtagccccagtcgctggagttcaccagtttttcttcttctttcagcgtgcggcgaacggcgtcatcaaaggtgataagcgtttgggggatcaacttttttaacgcggcgtcatcggccagcaaatcgtgccttaatccctggattaacgcttttgcggtagttggcggcacggaggtaatgacgtttaaaaaccagaccgaaatccagcgggtcggaaaaggcaccgggatcagcggacgccgtttaccgctgacggccataaaacgttcaaactgctgctgataacttaatacctgcggcccggcggcttccagaatacgatgctcgtgcgcagggtgctccagtaagccgaccaggtagtagagtaaattttccagggcgatgggcgtggtgcgcgaacgcacccagcgcggcggcgtgagtattggcaggttgtaaaccatgtcgcgcatgacctcaaaggcggcggagcctgcgccgacgatgatcccggcgcgtaattccgtcaccggtacgcctgcgtcgcgcagcgtgtcagccgtaagctggcgggcgcgcaggtgatcggattgctcatgcgccggcgcctgcaatgaactgaggaaaataagttgtttaaccggcgtctggcgcagcgcgtcgcgcacgttgagcgccgcctgacgctcatgggcgataaagtcgccgccttcgcccatgccgtgtaccagatagtaaacggtatcaatgtcgcgaagcagcgcgggtaaattttccggccagtgcagatcgaccttatgacaactgacgttggcgaggcgatgtttttccagacgttccacgcgccgcgccgccgcccgcacctgatgtccttgctgacttagcgcaaagaccaggtgctgaccgatatagccgctggcgccgaggaccagaatgcgttgcgccacNNNNNNNNNNNctagatcacgtattcgatcaacgctggttcttgtttacagaggcgacgccagtcgacaatcggcattcgtacctgcagactgacgctaccgtcttcctccatccactctttttctattgcctgaagctgataaaaccggcttctcagacgcccttcctgcggcagcaaacgcagcgtatgctgcgccacctcgccggaaagacgctccgtcaaagcctgaaaaagctgtggtattcccacgccgctttgcgctgaaagccaaacgcggatgggtttattctcttcatctctgtcgatacgcggttcaaagtcgtccagcatatcgattttgttcatcaccattaaggtggggatttcgtgagcgtcaatctcttcaagaacggtgtttaccgcctcgatgttttcctgcacacgaacatccgccgcatcgaccacatgcagcagcagcgtcgcctgacgcgtctcctgcagggtagctttaaaggcagccaccagatcgtacggtaaatggcggataaagcctaccgtatccgccagaacggtttcaccgacatccgctacatcaatacgacgtaacgtggggtccagcgtcgcaaatagctgatctgccgcatagacccgcgcttcagtgatctgattaaaaagggtggattttccggcgttggtatagcccaccagcgataccgtcggaacgtcggccttgatgcgcgactgccgcccctgctcacgttgcttctcaactttctccaggcgcgactgaatctgcacaatgcgattacgcagtaaacgacggtcggtttcgagctgggtttcacccggaccgcgcaaaccaatcccgcctttctgacgttcaaggtgggtccagccacgcaccagacgcgtagccagatggcgtagctgcgccagctcaacctgcaacttaccttcatgggtacgcgcacgttgggcaaaaatatctaagataagaccggtgcgatcgataacccggcactcgcacaaacgctccaggtttcgctcctgggctggactcaatgcatgatcaaacaatacgaccgctgcgccagtcgctttcacggcttccgcaatttcaactgccttaccttcacctacaaagtacttcgggtgcggtgctttacggctaccggtaatcacctgcattgcttcgacaccggcggaagagaccagagattcaaactcctggaggtcttccatatctttgtcttgcgaaaaatagatgtgtaccagtaccgcctgctcaccggcatcataacggtcaaacaaNNNNNNNNNNNtcagccgctaaacacgttaccggcgcccggcgcgctttttaacacccagacgcgaccatagtgattataccatccggcacgatgcccggcatccgggccaatcccctggtaaatatcaaagtgctggcctttaatcgctccgccgacatccagtgcgaccatcaaacgtagctcatactgaccgctaaatttaccgttgttatccagcaacggtacttccgccaacaaggttgtgcccggcggaatgatgctgcggtcggaggcgacggatgctcgcccaatcagcggtacagcgctggcgcctttgaccggcgcaaaagattgcggtttaaagaagacgaacgacgggttctgctccagtaattcacgcacttccgcttcgctgtgcttctctccccattcgcgtatagcctgcatcgacatatcttcttttttcacttcaccgcgatcgataagcactttaccaatactgcgataaggccagccatttttaccggcataactaaagaagttcagcggactaccatcaccgaaatcaatataaccgctgccctggacatccataataaagttatccatcagcgaattactccaggccaggatgtacttatcgctcagcgcgcctgcgtagatctgggcgcgggacggtaagcgtccgcgttttggcggcatactatagatagggtactggaacgcgccctggcgcgtatggcgagcctgaacgacgggcgtatagtagcccgtgaactggacgttaccgtagttgtcggtgccttccatctgccaggcatcgataccaaactgacgcatagtgcgcgtatcgcctccggaacgtaaccagttctggacagcgttatagacgttgctttgattggtgtataaacgcggcgacgcggaacggatctggtcgacctgctcggcaaagtcaccagcattaatcggcgcgcccaccgcgtccggctggtttaccagggagaagggctgggtaaatttcccgtccttatattgctgaccgcgatcggtcggttttgatgaacaggcagccagcattgccagcattacgcctgtcgccacatattttgcccaacgtcctttcatNNNNNNNNNNNtcattctgacacctccattttttgcgccattttggatgctctgtattcagggatggtggtcacaatcgcaccgactaacgcgaacagcgtaccgatgatggtgaccaggtagaccgtattgcctaatgaagggatcaattcatcaattagcactgagccaagcagttggcctgctgttgacgctacgcccagcatcaatagccctaagcctctcaccagaatcgccattagcccgatggatagcagacccagcggaccaccgagatacatccaccatgtatcgggtaactggatggtgacatggcctaatgcgatacgtatcgccagcgccgcgcccaggacacaaaagccgacgatgaagttccatgtaatggacaccagcatggagcccgttgcctcggcgactttcgcattccccgcaggctgccagccagcgagtaaccctgccaaaaaggggaggatagcgagcaggataaacgaggttgagtgccactgtggcgacacgacaaaaatggtggcgataacggcgaacaatgcgccagtaatgcgccatggcgtaaaatattttttctcctccacgccgatgccaaaacggtcgcacagcaggccggaaagaagcagagcggaaattaatgccgtttgaaaggtggcaacgcccagcgcgctggcggatgcgccttcagaaaatacgaccatcgccccgcataatcctgcaaaccaattccatagcgggatttttctctttttaatcagagtagggattgaggcgaattgctggcgtgtttctttgcgcgcaataataataaaaaacatgacgaccagaccgctggcaaacgagattactgcgcaagcattaccgtcttgtaaccaatgtcctaactgcccattaacggcagactgcatcggggaaagcataccggctaagatggtggcaagcatcagtaagggggttgagtacttattcttgttcatNNNNNNNNNNNtcagttaaacggttgtaagtcgacacgcgccatcattgcggccaactgaggacgatcggtaatacccacattgctctggctgaccgccagcgcggcaaccgccgtcgccagacgcagcgtatgttcgctggactcgcgcatcagcaggccgtaaatcaatccgccaaccatggaatcgcctgcgccgacggtacttaccacgtcaaccgccggtggtttagcgatccattctcctgaggcgttaacccacagcgcgccttccgcccccagcgaaatcaccacatgagcgataccctgttcgcgtaacgcgtgcgccgcatcaatcacatctttcatttccgggagcttacgacccgcccaaatttccagttcgcggcgattcggtttcaccagccacggcgcagctttaagaccggcgactaacgcttcacggctactatcaaagataatgcatggacactggctgcgcagacgcgtcatccagtcggtgaacgcttccggactcacgccagccggtaagctaccgctgacgcagaccatatcgaactgacccagccagctcagggagtcgttaacaaagcgttcccagtctgcgggagtcacgtcaaagccggaaaagttgaagtcggtcacttcgccatctttttccgtcagcttcacgttgatgcgggtccggccctgaaccacctgaaagcggttagcgatacccagttcgctgaataattgctgaaaaccgtcctggttatctttaccgagaaaaccgccgacagtgacgtcgatgcctaagtctttcagcactttggcaacgttaatgcctttgcccgccgcgtgcagacccgtggttttcaccaggttcacNNNNNNNNNNNttaccattgcgtgccaactcccacgctgtctaaccagtctgaaaccacatcatgcgcgctgtgcgcggttaaatccacatgcaacggcgtgacggagacgtagccttcatccaccgccgcgaaatcggtatccggcccggcatcgtatttatcacccggcgggccaatccagtacaatgtattaccgcgtggatcttcctgcgggatcactttatccgctggatggcggctaccgcagcgagtcacgcggatgcctttaacctgcgctaacggtagatccgggacattcacgttgagaatacgcccggtacgcaacggctcccggcttaaccctcgcaaaagcgcgcaagtcacggctgcagccgtatcataatgctgatagccgttaagggagaccgctaatgccggaaagccgagatgacgaccttccatcgccgcggcgacagtaccggaatagatcacatcatcgcccagattcggacccgcgttaataccggaaacgacaatatccggacgcggacgcattaaggcattaacgcccagatagacgcaatcggtcggcgtccccatctgtacagcgatatcgccattatcaaaggtaaaagtacgaagcgaagattccagcgtgagggaattagacgcgccgctgcggttacgatccggggctacgacctgtacatcagcaaactcacgcagcgctttcgccagcgtttgtataccgggcgcgtgaaccccgtcatcgttactcagcaatatgcgcatNNNNNNNNNNNttagtccccttcaaggagcaatacagacacaacaataatgataaaaatggcgaaaaacgacgctgttatcatcagcgcttcaagaaacggtggatcgtacatNNNNNNNNNNN------------------------------------------------------------------------------------------------------------------------------ diff --git a/tests/test_data/genomes/G12.fasta b/tests/test_data/genomes/G12.fasta new file mode 100755 index 0000000..826cb3b --- /dev/null +++ b/tests/test_data/genomes/G12.fasta @@ -0,0 +1,2 @@ +>G12 +NNNNNNNNNNNtcattctgacacctccattttttgcgccattttggatgctctgtattcagggatggtggtcacaatcgcaccgactaacgcgaacagcgtaccgatgatggtgaccaggtagaccgtattgcctaatgaagggatcaattcatcaattagcactgagccaagcagttggcctgctgttgacgctacgcccagcatcaatagccctaagcctctcaccagaatcgccattagcccgatggatagcagacccagcggaccaccgagatacatccaccatgtatcgggtaactggatggtgacatggcctaatgcgatacgtatcgccagcgccgcgcccaggacacaaaagccgacgatgaagttccatgtaatggacaccagcatggagcccgttgcctcggcgactttcgcattccccgcaggctgccagccagcgagtaaccctgccaaaaaggggaggatagcgagcaggataaacgaggttgagtgccactgtggcgacacgacaaaaatggtggcgataacggcgaacaatgcgccagtaatgcgccatggcgtaaaatattttttctcctccacgccgatgccaaaacggtcgcacagcaggccggaaagaagcagagcggaaattaatgccgtttgaaaggtggcaacgcccagcgcgctggcggatgcgccttcagaaaatacgaccatcgccccgcataatcctgcaaaccaattccatagcgggatttttctctttttaatcagagtagggattgaggcgaattgctggcgtgtttctttgcgcgcaataataataaaaaacatgacgaccagaccgctggcaaacgagattactgcgcaagcattaccgtcttgtaaccaatgtcctaactgcccattaacggcagactgcatcggggaaagcataccggctaagatggtggcaagcatcagtaagggggttgagtacttattcttgttcatNNNNNNNNNNNctagaatgaaccggaatgcggctctaaactcatcccataatcgctgattaatcgccacgttaccgtaccatgaggggcggcagtcgaactcaatttaaatgacgctgatgattttggcggaacgaaggtcgctgatttcacttcatggctgttaagcgtcacactggcaaagttcatataatagggggtagggttatttacggtaataacatcccctgccgtctgccattttaattgttggctctggctatcaggcgttgatttggttaatgctggcggtcgataaataagctttatttgggtattaatggagatttccacgcggttcgcggaggcattatcatcaatagaaggaatacccttaatattgagccagtacatagactcccgatctgcaggtagaggggcgccagagcgaataacccgaatactgttcttttgcccggcatcaaggcgaaaaagaggcggggtgataataaacacctgcttatttgtgacctggggatcggcaaccgataaccatgactgaacaagattcgctttgctgtctttattctctacattgattgacgactcatcattattgccgtcgaaaaccagtcgggttccgccaacaacaatgctggcatgggcaacatggcttgtcagtaatacgaccaacacggatttctttatgtgtttcatNNNNNNNNNNNtcatgccgcttgctcctgcacccacttcgtcaccgtggcgcgaatgtcgcgcatgacggcatccagcggctgggtcgcgtcaatggtgcggatacgcgagtcttgcgccgccagttccagatagcgcgcgcgagtccggttaaagaaatcaaaagattcctgctcaatgcgatccaaatcgccgcgcgctctggcgcgttttagcccgacttccggcgtgacatccagatacagcgtcaggtcaggacgaaaatcgcccagcacggcatcgcgtagtgtcgccagcatggtttgatcaatgccgcgccctcccccctgatacgcctgggtcgagagatcgtggcgatcgccgatcacccatacgccttgcgccagcgcgggtttgattaccgtttcgacgagctgtacgcgagcggcataaaacatcagcacttccgctttatcggtaatcacttcgtcgcctaccgatcggatatccagcaccagacttcttagtttttcggcaagctgcgtaccgcccggctcacgggtaaaaatcatgttacgaatacccagttgctcaagggtctccaccaccacgtcgcgcgcagtggtttttccggcgccttccaggccctcgatgacgatataattactgcccatNNNNNNNNNNNctagccgggaacaaattcaccgtctaaaaggagttttccgttttccgcatacataacgagcgcattaagatactccggatcaaacttcacgccgcgcttattttttacaatatacgtcaaacaatgattatgggttaaaatgactatatttttaatctgcgaatttttcagtaacgtattgattgaagaataaataccgctgccgcaatccatcatttttttaaccgccctacgcgacctgcctgccgaaaaccaggttgccgactggatggcgcgcaccgtattactggaatagagattgtaattttgtatatcggcactgaaggctttacccagcgctctggcatcttgcgcaccgttgaccgtaatccccgtgctgtctggcaggcaggtattatcggaacgatcgcaccgctcggcatggcggaataacacgactaccggatgctgcttcgccagcgccgccagcgccttaccgttaatctgcggtagaccgttaccgctacaggcatgttgtgagtccaacccagcgataataaccaacgcccccgccagaatcgcgaaatagcgtttgtttttaataaagcgtagggtaaatgccagcacNNNNNNNNNNNttaaaccgcttgttcaaggctgaaactgtgacagtgaacctgcggtttcagcctgtcgttcataattttaatcgcatcgccaagctgcatggttcggcctgctatcaccacgcctggctgtgccagcagacacaacgcggcattttctcccggctcaacgaccagcaaattaacctgttccgccgtatcgcttaaccagacgcttgcggcatcacccgtgcccggggtccacatttcgccatgcgccacaaaatgccagcttttcggcatctgcggtttgagatagcgaatcgccaccagcgcattcaataccagctccgcgcgttgctctttggttaattcgaaatcccggcatttttcttcaaaggaaaaatagagcgcggcatcatccacacaaaaaccggtcgggcaaaacgcgtccggcgtaagcattttacgagagaagcgcgagcgaaaaagcataccattggcgagatcgagcatcatacgatcgtgctcttcatcataataccagcgccagttatcgtcaggtttaattcgcatNNNNNNNNNNNttataaggcttgcagtctttcatgggcagcaagtaacgtctgatatatgcttaaattcttacttccgggttcaagtagaacttttttaaattcggtcatgtgttgctcttcaacttctttcgaacgcatgtattgtggaagttcctggaagaaggtaagcgcctgttctttggtttgcttatatttttcgcaaaaaatgcgtgagctgattgcgctattttttgatgcggtattatcagcgatctggtttgataatgatttattcttcgcaaggtctgaaggcacatacggaagtgactgacactcatcaataccatttgcgttggccagttgctctttctgagcgctaggttgctgtaccggtttgctcacggaggaacggagaggcacctgggcacagccgatcagtaaaaagacaggatgacagctatcaaattttttcatNNNNNNNNNNNttagtgcgcttttacccgcctgaaccagtaattttccatcttcgttatccatttccactttttatttttcggtattacgccagccctaagtaattgcagctgtcggttgtaggttccaatagcgtgtccaccatacgccccttcaaatgctatcaccatgggtccctgcccttttaccccctcttgtcgttcatggcaataaagcgcgttgcggtgagtcacgtcaagttcatactctacgttcagtttcacacatttttccccgcaggcttctactggcggcaatattgtcacggtataaggcgggttatctgcaggggttttccatgttccgatcaggtctttgcaggcgtttatctccgtagccccggaggaggcagaaaatagcgcgagcaaataaaaaggtattagtttcagNNNNNNNNNNNttagtcccaccaaacgtcgaaaagttcgctggttcggacttcttcaaggttgcgcgcttccagccacttacgcacaatcgcctgatgttcttcggtacatttaccgatttcctgcatacaaatcagcccttcccaggccaggtagccgctgccgtcaaacgccagtttattaggctcaataacgtcattaataaagtcatcgacagtcttatcgatctgctcttcagatgtaccttccggaaaacgccatgccaccgaaaatcctaattcctggaattcgtcaatgtgcatttttttacgcagacgacgactacggttctttgccatNNNNNNNNNNNttaaggcgtcacaatcagcagtccctcgctcgcgccggttgcccgccactgaggcgcgtacatcgattccacctgcggttgcgcaagctggtacgtccccggcgttaccgcgcgcgccaggtagaccagcgtcacgggctggccctcattgacaacgacggcagccacaaaccgatcgtcgcggaattccatatactgaatatccgcctgctgcatctgattaagcagattttgcacttcgctaccgctctccggcaggctggcgctgctgtcagccagattctggttttccagctccagcccggccgggagcaggtccaccaccagcgcatccggcacattgcgatcggccactaccgttaaccagaccagcaccagttcgccgctacgcaacgaggacagcgatttgcgctgaccatcggtccccagtatttgtcgttcaatctgcaaaacgttgctggcaggctcaggcgcagatgagggatagccgctgctatccagacgcagccatagcggctggctaccggtgttcgtcacctcaagggcggccagctgatcagcatccagattacgggtcagcgccttgtcgcccgacagcggctgcgcctctaacgaggtctgcgcctgccaggcgcccgcgctggcctgtcgcgaatgcgcggcgaggaacaaggcattgttctcctgggtagagagccagcgctgaccgaaggcctgctcagaaagcgagcttaatagcgcgttttgcgcgtccggtctgaggttgttctcttccagtaacgacaacatcagagcgttatcgcgcagagaactgccgtaatccgctatccattgccgttcgtcctgacgcggcgtattcagagccagcgtaatggcctcttcgccgcgtctggcatcacccatcgtgtttaacgcgatgcccaattgcatcagcggcagtcctgaagccgcctgactacggcgctcccagatttcgcgcagcgcgccgagcggcgctttctgctgacgcgccagtaccagcgcggcgtaagcctgagcggcaaaagtactggcctgggtattatcgctataacgaatcagcatcgtaccgggatcctgcagatagcgcagcagtcgctcattgccccggttaatggcctccggcgggacgctatatccctgctcgcccgcgcgaatgaggaaatccatcgcgtaggccgttagccagggctcttccgccccattttcatcccatagcgcaaaaccgccgttatcacgctgcatctgtagtatgcgggagatgccgatatccaccgcggcgcgccgtttttcatcgctatcgccggtaataccgagcgactgcaattgagcggcattggtatacagcgccgggaataacccgctggtggtttgttccaggcacccgtacggatatgctttcagctcgcgaatgtagcgcgccagattgagcggcggttttccgcttaacagcagttgtccctgtaacgtggctggcgagacgtttgccagatgctgctctggtacatgccagctctctcccggcgccagcgcaatgccgctatttaccgtttgggcaggccaggccggacgcacgccgatttgccactgcttatgctgcgcgccgagggtttctcccggcagattcagaccgctaatggtcgcctggatttcgccttcgccaaaaccttccagcgcgcgtaccggaacgaataaggtggtgcgcacgcccggcgccaggttgaccggttgcggctgttgactaagcagttccagtaacccactggcggcgagcgcaatattcagcgtctgcgggcggtcggtcagattggtgacgtccagcaccagtcgcgaaacatcccctcccgccagaaaacgcggcatattcagctcggcaatcactggcgcggcgacaacgactttgctttcgccgcgaccaaaatcgtccgctgtccatgcctgcgccataacccgcagttcgccgttaaagtcgccaatcggcagcgttacgaccccttcgccctgctcattgagcgtgatcggctgcgcctgctgcgcgatgatattggcatggtttaccggcggttttccgccgcgcgtaaggtcgtcgccatcgccgccaaaacgcaacgccgccagccgcccctgcccttcaatgacctggccgtaaatatcgtagatatccgcaccgtagcgtttttgaccgaagaacgcctgccacgggtccggcgtcgcgtaatcggtgatattcaatacaccgctatcgaccgcggagaccagcacgttgatctgttttggcatttcgccgtgtttaacgctggctttcaccctgacggtgagcggctgattcgggcgcattttagccgggctttccagcgccagatcgaggcggcggttgtcatcccccagcggtagatgtagtaaccccacggcgcgttttggcgtcgcggaacgagatttatcgccgggacgcaccaccagcgtactgagatagagatcgtggcgattccaggttttatccaccggaatcgtgagctccagcccctgcgccggcacgtcgatcgcctgccaccacagcggaccatcgctggactccaccatggcataacctttaccggcgaccggcgcggcgatatgcaatttcatggtgtcgcctggacgataattcgctttatccagtttgagggtgacgcgatccggacgcgccgcgccgctaccgtcgctgttatcctgccagctatagccggcccagaaacgaacgctgctgaccgtctcattcggcgctttgacctccagacggtacgcgccccattccaccgggaagctgacttttccggtttcatccgcgttcagatccagcgtctgctcgccctccaccagatctttttgatcaaactgcgactgccagccttcgctttccgaccagttccagtaatagtcgcgacgctcgcggatgagccgcacctgtaaaccggacaccgcttttttctcgccctgcgcgttggcgtaaacaatatcgaatgcggcgttgctgtcttcgtcgacaatcggttgattaacggtggtatccgtacggtagtcgtataccgctttggcggcgaactgtggacgaattcccggtaacgtatcggcaggccaaatcgcctgctctacgcgacgagtgaccggacgaccgccagactccagcaggctggcctgtagaattacctgcaacggcgaatgcgcttcttgccactggctggcagcactcacttcaccacgtccgcctttatccaacgtcagttgaacttcgtccaggctgcgcgaaagattctcttcggcaatattgccgaactggaagccaggcaacgccgcgacagcgtcgcgcagcggacgcaggaaaagttgcccttgcagggtattgccgttagcaggggcgccatacaagtaatagccgacaacggagaatttcacctcatccgcaggcgccagcggtgttttttgcgccgtcaggttgagcgccatccgctccggcataaagtcttccacgtggaaatcccaactccgcagcaaattatcgccggtgttggcgcggacatgccacaagccggtcggcgcgttgatatccagcggataattcaaacggtatagtccgttttccggctggctgacgacggtacgcatcacttgtccgtctggttttaccacttccagcttaacgggttgatcgggcagcgttttaccgtcgctatcgcgcagtaatccgttgaggataaccgtttcgcccggtcggtagagatcgcgtgggccgaacataaagaactgcttgctgtagccgggcgcgccagcgacattaaactccgacagatccagagccggaagcgtgagatcgagcagcgtggtctgcccctctttacgcgccagtaatagcgccgccgctttatcagcctccagttgcacatgtccctgcgcgtcgctcgtcgcctgcgccagcgtctgccctttatcattcagaagaacgatctcaattcccgactgcgccgcgccgttttccaggctttgcgtaaagatatccagccgactatggtaacggtgcgcggacacgccgatatcgctaagggtaaacaacgtagcggcattactatagttgtagtgtccagcctgattcattaccgccacatatacgcccgcctgttgcagcggcttaatatcgcttaatggcagcagcagtttctcacgcgtattacgcgccggattaagatcaaaacgaccggtataaaccagatccgccattttcagcagattgtcggattcccagttagagagggaactacggtactcccactggctgacaaacgacgccagcgatccgggcttaacgcggaaaaagttcacatcaacgtggttgacgttaagcgccatgaccggcagtccttccgctattttccccggtagcagcgatccccggctggcaaagccgacgctgggctggacatcacgcgtggtaatcgttttttcataagacttgccgaaggtggcgttattcagcgctttaacggcgggatcaaccgtgaccaccagcacgcgctcaggttccagatgacgtaaccttagctcttttaaatttggcgccagctcccatgcgccgtcaacgctgccgcttttcttatcaaccacgtgaaccacacgggagaaatcctgttcaggatctaaaggaattgaaaacgtcagcaccagcgtcgccgcgccgtcgagctgcgcttcggaggcgtctaatagcgtgagcgctttgccctggctttgctgcgccagcttttgtagctgtgacgggtcttgcgcgggcgagggctgcgctacggctggcgcttcgcttttagtcgtcggggcggttttatcgttgttatcgcatcccgccagcgccagcatgatcatgcaggccaccacgcgtaaatgtttcatNNNNNNNNNNNttagccctgatgcggcaacaattccggttggacttgtaccggtggcttgttgctggtcagcgcggcttcatcagcctgaatggtgccggaatttgcggcacatacgccttcatgcgtgtggatgatgtgctgatgctgcgcattcacatcctcgcccattgccgcgatatgcgtgctttccgtaccgccactgttagtcgcccaggggatcacggtatcgctggcaaatcccatgccggaagtcaaggtcgcagttagcgctgccgttgtcagaaaaagtttcatNNNNNNNNNNNttagatattccgtaaagaagcaaaaagtaaagcccactcgctcttcgcgcgatagaagaccggcggcttgccaatcggcgcatccacggaaatctcaccgccgtggtgagcttcgccggtccagatattcacccagtgatcttccggcaggtacagcgtccaatcgcaacgcccctgctcgtgaaccggcgcgaccagcagatcctgaccgagcagatattgatatttcagggtgtaggtagcggcatcgttctcgtagtgcaggaatagcggacgcatgaccggcagaccggtagccgcgttttgcgccaccgcctgcttgagatacggtttcagcgtggtaaagacggtggtcatgcgggcaaagtgtgcaatagtttccgcgtcgccgtcgaactgccagtcattgctggggcggttgccttcatgggtgcgcatcatcggcgtaaaggcgctgaaatcgcaccagcgcagcagcaactctttgctgcgcttcatgtcaaacagggtggtgtagccgccgatatcgctgtgatgcagaccatggccggtcatcgccagcgacaatgcagcaggcacgacagaggccagaccatcatcaagactccagtcaacgttctggtcgcctgcccacatcatggtggaatatttctgactgccggtgtaacccgcccgcataaagaacaggatctcgccgagcttgccggtcttctgtagcgcttcgtagttacacttcgcccacagtgcgggccaggcgttatgcatgagctctgcgctgaccccgttgtgcagatacgtgtcggtcggcagatattcgccgaaatccgccatccagccgctgcagccgagcgcgatcatgttctttttgatgacatctttgaaccagtcgtaagcttcaggattagtcagatcgaccacgccgccatagaattcgccaaactcgaccagatagtcgccgcccgtggcgtctttcgccagatagccgtgtctcgccgcctcagcgcagaggtctttatcactggcgacgtatgggttgatataagagaggaactggacgccttcttctttccactgtttgatccggctatccagctgtggatagttgtcgctattccacttccagttccacatcacgcgcttgccaaaggaggtcatgcggataccggaccagtcctgcgcccaaataccgtacacttttacgcctgcgttgcgcatgttatccagtttttgctgacaaacttccgtaccgccctgtatgccgagcgtgacgccgtcgtaaacccagtccggcagctccggctggcgacctaacagcgcagtcagtttttccagcagggcgatgtaggtgtcggcacactcaaaacgcagcgtagttttatcttcccacagcgccagttcgtgatactccggcgcgctgaagtcgaaattcatatagcagctattatcgacgtggcagtaatacttctgcgtgctgacaaaggtcggttgcgggaagaaggtcaagtaatagtcgccgccggcgttctctttacagtctgcctgccaggtgacatagctggttttattacgaccaacgccctgttcgctggtccacagcgggaacggcttgccgcgcaaatcgaaataagagaactgttcgccgcagccgtagatatggtcgtctggattagctgcgaggcgtaaccagatacggttatggtgcaggtcgtcgttttgcagatccaacgtcaggcgtcccgcctcatcggcggagatgcgaagggtggcgctaattgttgcgccacggctgaattgtaccagccagccgtcgggtagctcgctgactgtggcctccgttaatgcaatcttctcgttaagtttgtctttgatgctgaagttgccgcgaaacatgtcgatgtcggcaacgcccgcgccaatccacagacaggggttttcggcgctgtggcgtaaaatcaggcgctgttgccagctaagcgcaaaaccatcctgtgatgttgtcagttcaaaatcggttgaccgttgtggtagagaattcatNNNNNNNNNNNctactcatcttcaagataagtataaccgtacagtcccgcttcaaattcctcaaggaactgctgctgcaacgcatcgtccagatccgtctgttttacctggtcgcggaaatgcgttaatagcgttttcggatccagttgcacatattgcagcatatccgcaacggtatcgccttcgtccgacaactcaacctcgacactaccatccgggaagacaaacacgtcaaccgcttcagtatcgccaaacaggttgtgcatgttaccgaggatctcctgataggcgccgaccataaagaagccgagcatcggcggattctctggatcgtattccggcatcggcatcgtcgtggcgataccgtcgccatcgatatagtggtcgatagcgccatcggaatcacaggtaatatccagcagcacggcacgacgttccggtacctgatctaacccttccagcggcagcaccggaaagagctgatcgattccccacgcgtccggcatcgactggaacagcgagaagttgacgtacattttgtccgccatccgctcttgcagttcgtcgataatcgggcgatgcgcacggttttgcgggtccagttgcttctgcacttcatggcacatgctgagataaagttgctccgcccaggcgcgctcctgcaaactaaacgcgccggaagagtagccgatatgaatatcgtgcagatccatttggctatcatgcagccattcacgcagcgagcggcgggtgccaggcttatgcatctcctgccaggtttcccacagattttgcagcgcgcgcggcgcatcttcagcaggggcggtcggatccgtgtattcgttacgctccacgccgataatgttagagaccagtaccgtatggtgcgcagtgacggcgcgcccagactcggtaatcaccgtcggatgcggtaaaccatgctcttcgcaggcatcgccaatcgcccagatgatgttattggcatattcgttcaggccatagttcaccgaacagtcggactgcgagcgggtaccttcataatccacgcccagaccgccgcccacgtcgaagcactggatattaacgcccagcttatgcagctcaacatagaaacgcgcggactcgcgcacgccggtcgcgatatcgcgaatgttcgccatctgcgatcccaggtggaagtgcaacagttgcagactgtccagacgcccagcgtcacgcagggtctccaccagttgcagcacctgcgtcgccgccaggccgaattttgatttttcgccgccggaggattgccacttaccggacccctgagaggccagacgcgcacgcacgcccaggcgaggaaccacgttcaggcgctcggcctcttccagcacaatcgcgatttcagacatcttttcgatgaccagataaaccttatggcccatcttctcgccaatcagcgccagccgaatatattcacggtctttataaccattacagacgatcacgctacgggtcatgccggcatgcgccagcaccgccatcaattccgctttcgaccccgcttccagccccaacggttcaccggaatggataagggactcgatcacgcggcgatgctgattgaccttaatcggataaacgaggaagtagtcgccgttataaccgtaagattcacgcgcacgcttaaacgccgcgttaattgaacgcaaacggtgttgcaggatctgcgggaagcagaacagcgccggcagacgctgaccttgcgcttcgcgcgctttcaccagtttggcaagatcgacacgcgcttccggtacgtcgggatcggggcatacgctaatatggcccagctcgttgacgtcgtagtagttattgccccaccaggcaatattgtaagtgcgcagcatcttgctggcttcctgggagctcattgcaacctcctgcatNNNNNNNNNNNttatccgatacgactgacttcatcaaataaggtggctaacccgctgcgccgttccgttcgcgtcacaatcgcgcctgccaggatccgttcatcggcatacagcgataaccgccgccgcgcccgcgtaacagcggtatacaccagctcccgcgtcacgaccggcgaacgttggctgggtaaaatcagcgcggcgtgatcaaattcagacccctgtgatttatgtaccgtcatcgcccaggttgtatcatgttccggcagacggctgggctgaacggacttgatcgtgccgtccggcatcacaaaccagacgcgtaacccctgcccgcgatcgagcgcaataccaatatcgccgttaaatagccccaacgcgctatcgttgcgcgcaatcattaccggacgcccttcataccagcgagagtgcggatgccgctgaatttttcgttgctgcaccatcgcctgctcaatgcggtcattcagtcccctcacgccaaatgggccttcgcgcagcgcacaaagcagttgatactcattgaaagcctgaaggattgcctccggcgccgctttttcatgcagcaaccgcaggtagcgcccatagcccgccagcgcttcatccagcatcccggcataatcgtcgctgctttgcaatgtacgcttctctatatcgctaaacccctgctgaaaaacagcctggattgccgacctgtcgccacagttaattgccgccgccagcttgccgatgccagaatcgctgccgaaacggtagctcttttgcaacaaacagaggctatcgcgtaaagacgcggcttgcgttccggcccccgccggaatggcgctaccagtgagtcgacttagctgtcgggcgcgttccgccgtaaaccctgcgttgacataggcgcaaatatcgcccaacacagcgcccgcctcaacggatgccaactgatcgcgatcgccaagaaaaatgacccgcccgtgcggcggcagagcgtcaatcaaacgtgacatcatcggcaaatcaatcattgatgcctcatcgaccaccagcacgtccagatgcagcgggttgcccgcatgatggcgtaatcgctggctgccgggctgtgcgcccagcagtcggtgcagcgtactggcgtcctccggtatacgctttttctgcgcatcggtaagaggaagctgacgcaacgccgcgccgagcgactccgtcaggcgtgcggccgctttcccggttggcgccgccagccggatacggcaacgttcgccatccgccatttgaattaatgccgccagcagcttcgcgacggtggtggttttaccggtgccgggaccgcctgaaatcacggagatacggcgagttagcgctacggcggcggccaccttttgccagttcacctcgtctgtcggagggaatagcgcgtccagaatacgggataactgatcttcatctacggcgatggcctggttaacctcgttaaaaaagcgcgcaaccgtacgctcgttgcaccacatgcgattcaggtagaggcgatcgccgcacagaattaacggcgcggggctatcgccgcagctaaccgccgcagacgccagtaaccgctttttccagtcgattggcgtagccgtttcgcttatccaggcgaccagtaagggatgcgcctcctccgttaacgttaaacgcgacaacggcagacacacgtgaccttcacctgcgtcatgactaagcagcgctgccgccagcgtcacggcgggatcgtcgttaccggcgacggttaaagcaaactgggcatcaatgggccgtaagagtttttgttcaacggcctccagcaaccgcttctggattgtcatNNNNNNNNNNNttattcctctttctgtgtgggatgctgtcggccagaaacgacctccatacgggcgccaccgagcagactgtcgctggcaatgatctgcccggcgtattgttccgtaatctcgcgcgcgacagccagccccacgccttgtcctggtcgtagggtatcggcgcgctgaccgcgatcaaacaccagggaacgtttgctgtgggctatgcctgggccgtcatcttcgacgaaaatatgcaaatgatcgtcggtctggcgagccgaaatctcgacaaactccagacaatatttacaagcgttgtccagtacgttgcccattacttcgacaaagtcgttttgctcgccgacaaaactgatctctggtgaaatatccatactgatattcacccctttacgcagataaactttattgagcgcggagatcaggttatctaacaacggcgcgacgggatgcagttcgcggcttaacaacacgccgctaccgcgcatactggcgcgatgcagataatagccgatctgctgggaaatccgactgatctgttccagcatcaccggttcagctttgctgacgctcatcttttcgttgcgtaaagagcgtaacgtactctgcaaaaccgcgagcggcgtttttaaactgtgcgtcaggtcggttaggctcgtgcggtatttgttataacgttcgtgctcgcttttgagcagttgattaaggttgcgcacaaggctgatcagcttacgcgtcgtctccggattgagcatttcgcggtgatgatcttcaagttcgcggacttcccgcgacagcgcatcgatagggcgtaagctccaccaggcggcgatccacagtaaaggaatgactaacagtaaattggcggccagcacgtatacgaaccagctccacaccatataggagccttttagctctatcggaatggtatcgaccaccacgatggttaactgcggcatccgcgtcgtggcaggataaatatttaccgctaccgagtgggtcatctccgcatcatcgtcatcttcacgtacttctttgagtttttcctgcgcggaatggtcctcgctcaacagcgtgctggtggcgtctacgttggtttcaatttcatggaagccgttcgtttttaacccttccggttgagtgcttttaatcagccaggggatgttgcgctgcgtccataataatttgcccgtttcatcgtaaatcaccgtcatggtcgggctttgcatgtccagattttcaggcagctcaacgcagattttattattttcccatttggcgagggtataaaacaggttgctttcgccgcgcagcagacgaaacgtggttttatcaaaacttacgctatagccgaccagcgccactatgccatatgccagagaaagcacgagcacgacgccggctgtcgccagcaaaaaacgaacccgcagcgacagcggcagaaaatggcgagcaaatttattcatNNNNNNNNNNNtcatttttctgtgatttgttctgcaagtcgggcaatacgccttgccattccccggaaaataaacaggtgcgccgggatcatcagtagccagtaaatcaggcccggcattccgtgtggatgccaccaggcgcgcacgtcaatttcgcggtagcggcctttatcgtgcagcgtgaagctaagccgccccagacccggcgctttcatgccaaacaagagcgtgagctgtttttctggttcgacaatgatcactttccagctatctaccgtatcgccaggcttgagcaaggtatgcgacgggcggcctttcgccagtttatgccccaccagacggtccatcgcggcgcgcgtctgccacaaaatattgccgaaaaaatagccctctttgccacccagccgatttacgacctgccatagcgccgataggctggccggggtctgcgcggtaaagcccgcctgctttggaaaatagccgtattcgggacgccagcgggcgaaggccagcgcgtcgtagccccagtcgctggagttcaccagtttttcttcttctttcagcgtgcggcgaacggcgtcatcaaaggtgataagcgtttgggggatcaacttttttaacgcggcgtcatcggccagcaaatcgtgccttaatccctggattaacgcttttgcggtagttggcggcacggaggtaatgacgtttaaaaaccagaccgaaatccagcgggtcggaaaaggcaccgggatcagcggacgccgtttaccgctgacggccataaaacgttcaaactgctgctgataacttaatacctgcggcccggcggcttccagaatacgatgctcgtgcgcagggtgctccagtaagccgaccaggtagtagagtaaattttccagggcgatgggcgtggtgcgcgaacgcacccagcgcggcggcgtgagtattggcaggttgtaaaccatgtcgcgcatgacctcaaaggcggcggagcctgcgccgacgatgatcccggcgcgtaattccgtcaccggtacgcctgcgtcgcgcagcgtgtcagccgtaagctggcgggcgcgcaggtgatcggattgctcatgcgccggcgcctgcaatgaactgaggaaaataagttgtttaaccggcgtctggcgcagcgcgtcgcgcacgttgagcgccgcctgacgctcatgggcgataaagtcgccgccttcgcccatgccgtgtaccagatagtaaacggtatcaatgtcgcgaagcagcgcgggtaaattttccggccagtgcagatcgaccttatgacaactgacgttggcgaggcgatgtttttccagacgttccacgcgccgcgccgccgcccgcacctgatgtccttgctgacttagcgcaaagaccaggtgctgaccgatatagccgctggcgccgaggaccagaatgcgttgcgccacNNNNNNNNNNNctagatcacgtattcgatcaacgctggttcttgtttacagaagcgacgccagtcgacaatcggcattcgtacctgcggactgacgctaccgtcttcctccatccactctttttctattgcctgaagctgataaaaccggcttctcagacgcccttcctgcggcagcaaacgcagcgtatgctgcgccacctcgccggaaagacgctccgtcaaagcctgaaaaagctgtggtattcccacgccgctttgcgctgaaagccaaacgcggatgggtttattctcttcatctctgtcgatacgcggttcaaagtcgtccagcatatcgattttgttcatcaccattaaggtggggaattcgtgagcgtcaatctcttcaagaacggtgtttaccgcctcgatgttttcctgcacacgaacatccgccgcatcgaccacatgcagcagctgcgtcgcctgacgcgtctcctgcagggtagctttaaaggcagccaccagatcgtacggtaaatggcggataaagcctaccgtatccgccagaacggtctcaccgacatccgctacatcaatacgacgtaacgtggggtccagcgtcgcaaatagctgatctgccgcatagacccgcgcttcagtgatctgattaaaaagggtggattttccggcgttggtatagcccaccagcgataccgtcggaacgtcggccttgatgcgcgactgccgcccctgctcacgttgcttctcaactttctccaggcgcgactgaatctgcacaatgcgattacgcagtaaacgacggtcggcttcgagctgggtttcacccggaccgcgcgaaccaatcccgcctttccgacgttcaaggtgggtccagccacgcaccagacgcgtagccagatggcgtagctgcgccagctcaacctgcaacttaccttcatgggtacgcgcacgctgggcaaaaatatctaagataagaccggtgcgatcgataacccggcactcgcacaaacgctccaggtttcgctcctgggctggactcaatgcatgatcaaacaatacgaccgctgcgccagtcgctttcacggcttccgcaatttcaactgccttaccttcacctacaaagtacttcgggtgcggtgctttacggctaccggtaatcacctgcattgcttcgacaccggcgtaatagaccagagattcaaactcctggaggtcttccatatctttgtcttgcgaaaaatagatgtgtaccagtaccgcctgctcaccggcatcataacggtcaaacaaNNNNNNNNNNNtcagccgctaaacacgttaccggcgcccggcgcgctttttaacacccagacgcgaccatagtgattataccatccggcacgatgcccggcatccgggccaatcccctggtaaatatcaaagtgctggcctttaatcgctccgccgacatccagtgcgaccatcaaacgtagctcatactgaccgctaaatttaccgttgttatccagcaacggtacttccgccaacaaggttgtgcccggcggaatgatgctgcggtcggaggcgacggatgctcgcccaatcagcggtacagcgctggcgcctttgaccggcgcaaaagattgcggtttaaagaagacgaacgacgggttctgctccagtaattcacgcacttccgcttcgctgtgcttctctccccattcgcgtatagcctgcatcgacatatcttcttttttcacttcaccgcgatcgataagcactttaccaatactgcgataaggccagccatttttaccggcataactaaagaagttcagcggactaccatcaccgaaatcaatataaccgctgccctggacatccataataaagttatccatcagcgaattactccaggccaggatgtacttatcgctcagcgcgcctgcgtagatctgggcgcgggacggtaagcgtccgcgttttggcggcatactatagatagggtactggaacgcgccctggcgcgtatggcgagcctgaacgacgggcgtatagtagcccgtgaactggacgttaccgtagttgtcggtgccttccatctgccaggcatcgataccaaactgacgcatagtgcgcgtatcgcctccggaacgtaaccagttctggacagcgttatagacgttgctttgattggtgtataaacgcggcgacgcggaacggatctggtcgacctgctcggcaaagtcaccagcattaatcggcgcgcccaccgcgtccggctggtttaccagggagaagggctgggtaaatttcccgtccttatattgctgaccgcgatcggtcggttttgatgaacaggcagccagcattgccagcattacgcctgtcgccacatattttgcccaacgtcctttcatNNNNNNNNNNNtcattctgacacctccattttttgcgccattttggatgctctgtattcagggatggtggtcacaatcgcaccgactaacgcgaacagcgtaccgatgatggtgaccaggtagaccgtattgcctaatgaagggatcaattcatcaattagcactgagccaagcagttggcctgctgttgacgctacgcccagcatcaatagccctaagcctctcaccagaatcgccattagcccgatggatagcagacccagcggaccaccgagatacatccaccatgtatcgggtaactggatggtgacatggcctaatgcgatacgtatcgccagcgccgcgcccaggacacaaaagccgacgatgaagttccatgtaatggacaccagcatggagcccgttgcctcggcgactttcgcattccccgcaggctgccagccagcgagtaaccctgccaaaaaggggaggatagcgagcaggataaacgaggttgagtgccactgtggcgacacgacaaaaatggtggcgataacggcgaacaatgcgccagtaatgcgccatggcgtaaaatattttttctcctccacgccgatgccaaaacggtcgcacagcaggccggaaagaagcagagcggaaattaatgccgtttgaaaggtggcaacgcccagcgcgctggcggatgcgccttcagaaaatacgaccatcgccccgcataatcctgcaaaccaattccatagcgggatttttctctttttaatcagagtagggattgaggcgaattgctggcgtgtttctttgcgcgcaataataataaaaaacatgacgaccagaccgctggcaaacgagattactgcgcaagcattaccgtcttgtaaccaatgtcctaactgcccattaacggcagactgcatcggggaaagcataccggctaagatggtggcaagcatcagtaagggggttgagtacttattcttgttcatNNNNNNNNNNNtcagttaaacggttgtaagtcgacacgcgccatcattgcggccaactgaggacgatcggtaatacccacattgctctggctgaccgccagcgcggcaaccgccgtcgccaggcgcagcgtatgttcggtggactcgcgcatcagcaggccgtaaatccatccgccaaccatggaaccgcctgcgccgacggtattttccacgtcaaccgccggtggtttagcgatccattctcctgaggcgttaacccgcagcgcgccttccgcccccagcgaaatcaccacatgagcgataccctgttcgcgtaacgcgggcgccgcatcaatcacatctttcatttccgggagcttacgacccgcccaaatttccagttcgcggcgattcggtttcaccagccacggcgcagctataagaccggcgactaactctacacggctagtatcaacgataatgcatggacactggctgcgcagacgcgtcatccagtcggtgaacgcttccggactcacgccagccggtaagctaccgcttacgcagaccatatcgaactgacccagccagctcaggaagtcgttaacaaagcgttcccagtctgcgggagtcacgtcaaagccgggaaagttgaagttggtcacttcgccatctttttccgtcagcttcacgttgatgcgggtccggccctgaaccacctgaaagcggttagcgatacccagttcgctgaataattgctgaaaaccgtcctggttatctttaccgagaaaaccgccgacagtgacgtcgatgcctaagtctttcagcacattggcaacgttaatgcctttgcccgccgcgtgcagacccggggttttcaccaggttcacNNNNNNNNNNNttaccattgcgtgccaactcccacgctgtctaaccagtctgaaaccacatcatgcgcgctgtgcgcggttaaatccacatgcaacggcgtgacggagacgtagccttcatccaccgccgcgaaatcggtatccggcccggcatcgtatttatcacccggcgggccaatccagtacaatgtattaccgcgtggatcttcctgcgggatcactttatccgctggatggcggctaccgcagcgagtcacgcggatgcctttaacctgcgctaacggtagatccgggacattcacgttgagaatacgcccggtacgcaacggctcccggcttaaccctcgcaaaagcgcgcaagtcacggctgcagccgtatcataatgctgatagccgttaagggagaccgctaatgccggaaagccgagatgacgaccttccatcgccgcggcgacagtaccggaatagatcacatcatcgcccagattcggacccgcgttaataccggaaacgacaatatccggacgcggacgcattaaggcattaacgcccagatagacgcaatcggtcggcgtccccatctgtacagcgatatcgccattatcaaaggtaaaagtacgaagcgaagattccagcgtgagggaattagacgcgccgctgcggttacgatccggggctacgacctgtacatcagcaaactcacgcagcgctttcgccagcgtttgtataccgggcgcgtgaaccccgtcatcgttactcagcaatatgcgcatNNNNNNNNNNNttagtccccttcaaggagcaatacagacacaacaataatgataaaaatggcgaaaaacgacgctgttatcatcagcgcttcaagaaacggtggatcgtacatNNNNNNNNNNN------------------------------------------------------------------------------------------------------------------------------ diff --git a/tests/test_data/genomes/G13.fasta b/tests/test_data/genomes/G13.fasta new file mode 100755 index 0000000..aeaf668 --- /dev/null +++ b/tests/test_data/genomes/G13.fasta @@ -0,0 +1,2 @@ +>G13 +NNNNNNNNNNNatgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaaNNNNNNNNNNNatgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaaNNNNNNNNNNNgtgaacctggtgaaaaccacgggtctgcacgcggcgggcaaaggcattaacgttgccaaagtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccgacttcaacttttccggctttgacgtgactcccgcagactgggaacgctttgttaacgactccctgagctggctgggtcagttcgatatggtctgcgtcagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatctttgatagtagccgtgaagcgttagtcgccggtcttaaagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcacgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgtgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggtaagtaccgtcggcgcaggcgattccatggttggcggattgatttacggcctgctgatgcgcgagtccagcgaacatacgctgcgtctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactgaNNNNNNNNNNNatgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatgaNNNNNNNNNNNatgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctgaNNNNNNNNNNNttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctcttccgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccaacgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcagaaaggcgggattggtttgcgcggtccgggtgaaacccagctcgaaaccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgaaaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgctgctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaaatccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtctgcaggtacgaatgccgattgtcgactggcgtcgcctctgtaaacaagaaccagcgttgatcgaatacgtgatctagNNNNNNNNNNNgtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatgaNNNNNNNNNNNatgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctggtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatcagcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacgctgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcattcaaccggaatggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaacgctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgaggcgctggcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtgagctgaccagccttgtgcgcaaccttaatcaactgctcaaaagcgagcgcgaacgttataacaaataccgcacgaccctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatcagcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcataccccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttttggccgacagcatcccacacagaaagaggaataaNNNNNNNNNNNatgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataaNNNNNNNNNNNatgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtagNNNNNNNNNNNatgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacattgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccaccgtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggccgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattactggaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcattcagggcggtacggaagtttgtcagcaaaaactggataccatgcgcaacgcaggcgtaaaagtgaacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggcgtcagcgcagagatcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacaggagaccggcaagctcggcgagatcctgttctttatgcgtgcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccggcaataactggcagttcgacggcgacgcggaaactattgcccactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgaaattaccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaaNNNNNNNNNNNatgaaactttttctgacaacggcagcgctaactgcgaccttgatttccggcatggcatttgccagcgatcccgtcatcccctgggcgactaacagcggcggtacggaaagcacgcatattgcggcaatgggcgaggatttgaatgcgcagcatcagcagatcacccacacgcatgaaggcgtatgggcggccaattccggcagcattcaggctgatgaagccgcgctgaccagcaacaagccgccggtacaaggccacccggaattgatgccgcatcagggctaaNNNNNNNNNNNatgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaaNNNNNNNNNNNatggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaaNNNNNNNNNNNctgaaactaatacctttttatttgctcgcgctattttctgccgcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaaccactgcagataacccgccttataccatgacaatattgccgccagtagaaggctgcggggaaaaatgtgtgaaactgaacgtacagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaaggacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctataaccgacagctgcaattactttgggctggcgtaataccgaaaaataaaaaggggaaatggataacgaaaatggaaaattactggttcaggcaggtaaaagcgcactaaNNNNNNNNNNNatgaaaaaattttatagctgtcttcctgtctttttactgatcggctgtgcccaggtgcccctcccttcctccgtgagcaaaccggtacagcaacctggcgctcagaaagagcaactggccaacgcaaatagtattgatgagtgtcagtctcttccgtatgtgccgtcagaccttgcgaagaataaatcattatcaaaccagaacgctgataattccgcatcaaaaaatagcgcaatcagctcaagcattttttgcgaaaaatataaacaaaccaaagaacaggcgcttaccttcttccaggaacatccacaatacatgcgttcgaaagaagatgaagagcaactcatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataaNNNNNNNNNNNatgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaaNNNNNNNNNNNgtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggcctcacaacatgcctggagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgtcagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcaccatccagtcggcaacctggttttccgcaggcaggtcgcttacggcggataaaaaaatgatggattgcggcagcggtatttatgcttcaatcaatacgttactgaaaaagtcgcagaataaaaatatagtcatttttacccataatcattgtttgacgtatattgcaaaaaataagcgcggcgtgaagtttgatccggactatcttaatgcgctcgttatgtatgcggaaaacggaaaactctttttagacggtgaatttgttcccggctagNNNNNNNNNNNatgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatgaNNNNNNNNNNNatgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctagNNNNNNNNNNNatgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctgaNNNNNNNNNNN diff --git a/tests/test_data/genomes/G14.fasta b/tests/test_data/genomes/G14.fasta new file mode 100755 index 0000000..1a95091 --- /dev/null +++ b/tests/test_data/genomes/G14.fasta @@ -0,0 +1,2 @@ +>G14 +NNNNNNNNNNNatgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaaNNNNNNNNNNNatgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaaNNNNNNNNNNNgtgaacctggtgaaaaccccgggtctgcacgcggcgggcaaaggcattaacgttgccaatgtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccaacttcaactttcccggctttgacgtgactcccgcagactgggaacgctttgttaacgacttcctgagctggctgggtcagttcgatatggtctgcgtaagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatcgttgatactagccgtgtagagttagtcgccggtcttatagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcccgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgcgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggaaaataccgtcggcgcaggcggttccatggttggcggatggatttacggcctgctgatgcgcgagtccaccgaacatacgctgcgcctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactgaNNNNNNNNNNNatgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatgaNNNNNNNNNNNatgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctgaNNNNNNNNNNNttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctattacgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccagcgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcggaaaggcgggattggttcgcgcggtccgggtgaaacccagctcgaagccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgagaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgcagctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaattccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtccgcaggtacgaatgccgattgtcgactggcgtcgcttctgtaaacaagaaccagcgttgatcgaatacgtgatctagNNNNNNNNNNNgtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatgaNNNNNNNNNNNatgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctcgtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatctgcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacggtgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcactcaaccggaagggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaaggctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgatgcgctgtcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtaagctgatcagccttgtgcgcaaccttaatcaactgctcaaaagcgagcacgaacgttataacaaataccgcacgagcctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatctgcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcatagcccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttctggccgacagcatcccacacagaaagaggaataaNNNNNNNNNNNatgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataaNNNNNNNNNNNatgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtagNNNNNNNNNNNatgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacatcgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccacagtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggtcgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattacttgaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcatacagggcggtacggaagtttgtcagcaaaaactggataacatgcgcaacgcaggcgtaaaagtgtacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggggtcagcgcagagctcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacagaagaccggcaagctcggcgagatcctgttctttatgcgggcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccagcaatgactggcagttcgacggcgacgcggaaactattgcacactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgagatttccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaaNNNNNNNNNNNatgaaactttttctgacaacggcagcgctaactgcgaccttgacttccggcatgggatttgccagcgataccgtgatcccctgggcgactaacagtggcggtacggaaagcacgcatatcgcggcaatgggcgaggatgtgaatgcgcagcatcagcacatcatccacacgcatgaaggcgtatgtgccgcaaattccggcaccattcaggctgatgaagccgcgctgaccagcaacaagccaccggtacaagtccaaccggaattgttgccgcatcagggctaaNNNNNNNNNNNatgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaaNNNNNNNNNNNatggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaaNNNNNNNNNNNctgaaactaatacctttttatttgctcgcgctattttctgcctcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaacccctgcagataacccgccttataccgtgacaatattgccgccagtagaagcctgcggggaaaaatgtgtgaaactgaacgtagagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaacgacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctacaaccgacagctgcaattacttagggctggcgtaataccgaaaaataaaaagtggaaatggataacgaagatggaaaattactggttcaggcgggtaaaagcgcactaaNNNNNNNNNNNatgaaaaaatttgatagctgtcatcctgtctttttactgatcggctgtgcccaggtgcctctccgttcctccgtgagcaaaccggtacagcaacctagcgctcagaaagagcaactggccaacgcaaatggtattgatgagtgtcagtcacttccgtatgtgccttcagaccttgcgaagaataaatcattatcaaaccagatcgctgataataccgcatcaaaaaatagcgcaatcagctcacgcattttttgcgaaaaatataagcaaaccaaagaacaggcgcttaccttcttccaggaacttccacaatacatgcgttcgaaagaagttgaagagcaacacatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataaNNNNNNNNNNNatgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaaNNNNNNNNNNNgtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggactcacaacatgcctgtagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgccagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcgccatccagtcggcaacctggttttcggcaggcaggtcgcgtagggcggttaaaaaaatgatggattgcggcagcggtatttattcttcaatcaatacgttactgaaaaattcgcagattaaaaatatagtcattttaacccataatcattgtttgacgtatattgtaaaaaataagcgcggcgtgaagtttgatccggagtatcttaatgcgctcgttatgtatgcggaaaacggaaaactccttttagacggtgaatttgttcccggctagNNNNNNNNNNNatgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatgaNNNNNNNNNNNatgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctagNNNNNNNNNNNatgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctgaNNNNNNNNNNN diff --git a/tests/test_data/genomes/G2.fasta b/tests/test_data/genomes/G2.fasta new file mode 100755 index 0000000..92219fd --- /dev/null +++ b/tests/test_data/genomes/G2.fasta @@ -0,0 +1,2 @@ +>G2 +NNNNNNNNNNNatgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaaNNNNNNNNNNNatgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaaNNNNNNNNNNNgtgaacctggtgaaaaccccgggtctgcacgcggcgggcaaaggcattaacgttgccaatgtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccaacttcaactttcccggctttgacgtgactcccgcagactgggaacgctttgttaacgacttcctgagctggctgggtcagttcgatatggtctgcgtaagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatcgttgatactagccgtgtagagttagtcgccggtcttatagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcccgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgcgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggaaaataccgtcggcgcaggcggttccatggttggcggatggatttacggcctgctgatgcgcgagtccaccgaacatacgctgcgcctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactgaNNNNNNNNNNNatgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatgaNNNNNNNNNNNatgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctgaNNNNNNNNNNNttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctattacgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccagcgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcggaaaggcgggattggttcgcgcggtccgggtgaaacccagctcgaagccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgagaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgcagctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaattccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtccgcaggtacgaatgccgattgtcgactggcgtcgcttctgtaaacaagaaccagcgttgatcgaatacgtgatctagNNNNNNNNNNNgtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatgaNNNNNNNNNNNatgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctcgtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatctgcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacggtgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcactcaaccggaagggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaaggctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgatgcgctgtcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtaagctgatcagccttgtgcgcaaccttaatcaactgctcaaaagcgagcacgaacgttataacaaataccgcacgagcctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatctgcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcatagcccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttctggccgacagcatcccacacagaaagaggaataaNNNNNNNNNNNatgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataaNNNNNNNNNNNatgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtagNNNNNNNNNNNatgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacatcgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccacagtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggtcgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattacttgaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcatacagggcggtacggaagtttgtcagcaaaaactggataacatgcgcaacgcaggcgtaaaagtgtacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggggtcagcgcagagctcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacagaagaccggcaagctcggcgagatcctgttctttatgcgggcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccagcaatgactggcagttcgacggcgacgcggaaactattgcacactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgagatttccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaaNNNNNNNNNNNatgaaactttttctgacaacggcagcgctaactgcgaccttgacttccggcatgggatttgccagcgataccgtgatcccctgggcgactaacagtggcggtacggaaagcacgcatatcgcggcaatgggcgaggatgtgaatgcgcagcatcagcacatcatccacacgcatgaaggcgtatgtgccgcaaattccggcaccattcaggctgatgaagccgcgctgaccagcaacaagccaccggtacaagtccaaccggaattgttgccgcatcagggctaaNNNNNNNNNNNatgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaaNNNNNNNNNNNatggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaaNNNNNNNNNNNctgaaactaatacctttttatttgctcgcgctattttctgcctcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaacccctgcagataacccgccttataccgtgacaatattgccgccagtagaagcctgcggggaaaaatgtgtgaaactgaacgtagagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaacgacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctacaaccgacagctgcaattacttagggctggcgtaataccgaaaaataaaaagtggaaatggataacgaagatggaaaattactggttcaggcgggtaaaagcgcactaaNNNNNNNNNNNatgaaaaaatttgatagctgtcatcctgtctttttactgatcggctgtgcccaggtgcctctccgttcctccgtgagcaaaccggtacagcaacctagcgctcagaaagagcaactggccaacgcaaatggtattgatgagtgtcagtcacttccgtatgtgccttcagaccttgcgaagaataaatcattatcaaaccagatcgctgataataccgcatcaaaaaatagcgcaatcagctcacgcattttttgcgaaaaatataagcaaaccaaagaacaggcgcttaccttcttccaggaacttccacaatacatgcgttcgaaagaagttgaagagcaacacatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataaNNNNNNNNNNNatgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaaNNNNNNNNNNNgtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggactcacaacatgcctgtagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgccagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcgccatccagtcggcaacctggttttcggcaggcaggtcgcgtagggcggttaaaaaaatgatggattgcggcagcggtatttattcttcaatcaatacgttactgaaaaattcgcagattaaaaatatagtcattttaacccataatcattgtttgacgtatattgtaaaaaataagcgcggcgtgaagtttgatccggagtatcttaatgcgctcgttatgtatgcggaaaacggaaaactccttttagacggtgaatttgttcccggctagNNNNNNNNNNNatgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatgaNNNNNNNNNNNatgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctagNNNNNNNNNNN----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- diff --git a/tests/test_data/genomes/G3.fasta b/tests/test_data/genomes/G3.fasta new file mode 100755 index 0000000..1cc4b5c --- /dev/null +++ b/tests/test_data/genomes/G3.fasta @@ -0,0 +1,2 @@ +>G3 +NNNNNNNNNNNctagaatgaaccggaatgcggctctaaactcatcccataatcgctgattaatcgccacgttaccgtaccatgaggggcggcagtcgaactcaatttaaatgacgctgatgattttggcggaacgaaggtcgctgatttcacttcatggctgttaagcgtcacactggcaaagttcatataatagggggtagggttatttacggtaataacatcccctgccgtctgccattttaattgttggctctggctatcaggcgttgatttggttaatgctggcggtcgataaataagctttatttgggtattaatggagatttccacgcggttcgcggaggcattatcatcaatagaaggaatacccttaatattgagccagtacatagactcccgatctgcaggtagaggggcgccagagcgaataacccgaatactgttcttttgcccggcatcaaggcgaaaaagaggcggggtgataataaacacctgcttatttgtgacctggggatcggcaaccgataaccatgactgaacaagattcgctttgctgtctttattctctacattgattgacgactcatcattattgccgtcgaaaaccagtcgggttccgccaacaacaatgctggcatgggcaacatggcttgtcagtaatacgaccaacacggatttctttatgtgtttcatNNNNNNNNNNNtcatgccgcttgctcctgcacccacttcgtcaccgtggcgcgaatgtcgcgcatgacggcatccagcggctgggtcgcgtcaatggtgcggatacgcgagtcttgcgccgccagttccagatagcgcgcgcgagtccggttaaagaaatcaaaagattcctgctcaatgcgatccaaatcgccgcgcgctctggcgcgttttagcccgacttccggcgtgacatccagatacagcgtcaggtcaggacgaaaatcgcccagcacggcatcgcgtagtgtcgccagcatggtttgatcaatgccgcgccctcccccctgatacgcctgggtcgagagatcgtggcgatcgccgatcacccatacgccttgcgccagcgcgggtttgattaccgtttcgacgagctgtacgcgagcggcataaaacatcagcacttccgctttatcggtaatcacttcgtcgcctaccgatcggatatccagcaccagacttcttagtttttcggcaagctgcgtaccgcccggctcacgggtaaaaatcatgttacgaatacccagttgctcaagggtctccaccaccacgtcgcgcgcagtggtttttccggcgccttccaggccctcgatgacgatataattactgcccatNNNNNNNNNNNctagccgggaacaaattcaccgtctaaaaagagttttccgttttccgcatacataacgagcgcattaagatagtccggatcaaacttcacgccgcgcttattttttgcaatatacgtcaaacaatgattatgggtaaaaatgactatatttttattctgcgactttttcagtaacgtattgattgaagcataaataccgctgccgcaatccatcatttttttatccgccgtaagcgacctgcctgcggaaaaccaggttgccgactggatggtgcgcaccgtattactggaatagagattgtaattttgtatatcggcactgaaggctttacccagcgctctggcatcttgcgcaccgttgaccgtaatccccgtgctgtctgacaggcaggtattatcggaacgatcgcaccgctcggcatggcggaataacacgactaccggatgctgcttcgccagcgccgccagcgccttaccgttaatctgcggtagaccgttaccgctccaggcatgttgtgaggccaacccagcgataataaccaacgcccccgccagaatcgcgaaatagcgtttgtttttaataaagcgtagggtaaatgccagcacNNNNNNNNNNNttaaaccgcttgttcaaggctgaaactgtgacagtgaacctgcggtttcagcctgtcgttcataattttaatcgcatcgccaagctgcatggttcggcctgctatcaccacgcctggctgtgccagcagacacaacgcggcattttctcccggctcaacgaccagcaaattaacctgttccgccgtatcgcttaaccagacgcttgcggcatcacccgtgcccggggtccacatttcgccatgcgccacaaaatgccagcttttcggcatctgcggtttgagatagcgaatcgccaccagcgcattcaataccagctccgcgcgttgctctttggttaattcgaaatcccggcatttttcttcaaaggaaaaatagagcgcggcatcatccacacaaaaaccggtcgggcaaaacgcgtccggcgtaagcattttacgagagaagcgcgagcgaaaaagcataccattggcgagatcgagcatcatacgatcgtgctcttcatcataataccagcgccagttatcgtcaggtttaattcgcatNNNNNNNNNNNttataaggcttgcagtctttcatgggcagcaagtaacgtctgatatatgcttaaattcttacttccgggttcaagtagaacttttttaaattcggtcatgagttgctcttcatcttctttcgaacgcatgtattgtggatgttcctggaagaaggtaagcgcctgttctttggtttgtttatatttttcgcaaaaaatgcttgagctgattgcgctattttttgatgcggaattatcagcgttctggtttgataatgatttattcttcgcaaggtctgacggcacatacggaagagactgacactcatcaatactatttgcgttggccagttgctctttctgagcgccaggttgctgtaccggtttgctcacggaggaagggaggggcacctgggcacagccgatcagtaaaaagacaggaagacagctataaaattttttcatNNNNNNNNNNNttagtgcgcttttacctgcctgaaccagtaattttccattttcgttatccatttcccctttttatttttcggtattacgccagcccaaagtaattgcagctgtcggttataggttccaatagcgtgtccaccatacgccccttcaaatgctatcaccatgggtccctgcccttttaccccctcttgtccttcatggcaataaagcgcgttgcggtgagtcacgtcaagttcatactgtacgttcagtttcacacatttttccccgcagccttctactggcggcaatattgtcatggtataaggcgggttatctgcagtggttttccatgttccgatcaggtctttgcaggcgtttatctccgtagccccggaggcggcagaaaatagcgcgagcaaataaaaaggtattagtttcagNNNNNNNNNNNttagtcccaccaaacgtcgaaaagttcgctggttcggacttcttcaaggttgcgcgcttccagccacttacgcacaatcgcctgatgttcttcggtacatttaccgatttcctgcatacaaatcagcccttcccaggccaggtagccgctgccgtcaaacgccagtttattaggctcaataacgtcattaataaagtcatcgacagtcttatcgatctgctcttcagatgtaccttccggaaaacgccatgccaccgaaaatcctaattcctggaattcgtcaatgtgcatttttttacgcagacgacgactacggttctttgccatNNNNNNNNNNNttaaggcgtcacaatcagcagtccctcgctcgcgccggttgcccgccactgaggcgcgtacatcgattccacctgcggttgcgcaagctggtacgtccccggcgttaccgcgcgcgccaggtagaccagcgtcacgggctggccctcattgacaacgacggcagccacaaaccgatcgtcgcggaattccatatactgaatatccgcctgctgcatctgattaagcagattttgcacttcgctaccgctctccggcaggctggcgctgctgtcagccagattctggttttccagctccagcccggccgggagcaggtccaccaccagcgcatccggcacattgcgatcggccactaccgttaaccagaccagcaccagttcgccgctacgcaacgaggacagcgatttgcgctgaccatcggtccccagtatttgtcgttcaatctgcaaaacgttgctggcaggctcaggcgcagatgagggatagccgctgctatccagacgcagccatagcggctggctaccggtgttcgtcacctcaagggcggccagctgatcagcatccagattacgggtcagcgccttgtcgcccgacagcggctgcgcctctaacgaggtctgcgcctgccaggcgcccgcgctggcctgtcgcgaatgcgcggcgaggaacaaggcattgttctcctgggtagagagccagcgctgaccgaaggcctgctcagaaagcgagcttaatagcgcgttttgcgcgtccggtctgaggttgttctcttccagtaacgacaacatcagagcgttatcgcgcagagaactgccgtaatccgctatccattgccgttcgtcctgacgcggcgtattcagagccagcgtaatggcctcttcgccgcgtctggcatcacccatcgtgtttaacgcgatgcccaattgcatcagcggcagtcctgaagccgcctgactacggcgctcccagatttcgcgcagcgcgccgagcggcgctttctgctgacgcgccagtaccagcgcggcgtaagcctgagcggcaaaagtactggcctgggtattatcgctataacgaatcagcatcgtaccgggatcctgcagatagcgcagcagtcgctcattgccccggttaatggcctccggcgggacgctatatccctgctcgcccgcgcgaatgaggaaatccatcgcgtaggccgttagccagggctcttccgccccattttcatcccatagcgcaaaaccgccgttatcacgctgcatctgtagtatgcgggagatgccgatatccaccgcggcgcgccgtttttcatcgctatcgccggtaataccgagcgactgcaattgagcggcattggtatacagcgccgggaataacccgctggtggtttgttccaggcacccgtacggatatgctttcagctcgcgaatgtagcgcgccagattgagcggcggttttccgcttaacagcagttgtccctgtaacgtggctggcgagacgtttgccagatgctgctctggtacatgccagctctctcccggcgccagcgcaatgccgctatttaccgtttgggcaggccaggccggacgcacgccgatttgccactgcttatgctgcgcgccgagggtttctcccggcagattcagaccgctaatggtcgcctggatttcgccttcgccaaaaccttccagcgcgcgtaccggaacgaataaggtggtgcgcacgcccggcgccaggttgaccggttgcggctgttgactaagcagttccagtaacccactggcggcgagcgcaatattcagcgtctgcgggcggtcggtcagattggtgacgtccagcaccagtcgcgaaacatcccctcccgccagaaaacgcggcatattcagctcggcaatcactggcgcggcgacaacgactttgctttcgccgcgaccaaaatcgtccgctgtccatgcctgcgccataacccgcagttcgccgttaaagtcgccaatcggcagcgttacgaccccttcgccctgctcattgagcgtgatcggctgcgcctgctgcgcgatgatattggcatggtttaccggcggttttccgccgcgcgtaaggtcgtcgccatcgccgccaaaacgcaacgccgccagccgcccctgcccttcaatgacctggccgtaaatatcgtagatatccgcaccgtagcgtttttgaccgaagaacgcctgccacgggtccggcgtcgcgtaatcggtgatattcaatacaccgctatcgaccgcggagaccagcacgttgatctgttttggcatttcgccgtgtttaacgctggctttcaccctgacggtgagcggctgattcgggcgcattttagccgggctttccagcgccagatcgaggcggcggttgtcatcccccagcggtagatgtagtaaccccacggcgcgttttggcgtcgcggaacgagatttatcgccgggacgcaccaccagcgtactgagatagagatcgtggcgattccaggttttatccaccggaatcgtgagctccagcccctgcgccggcacgtcgatcgcctgccaccacagcggaccatcgctggactccaccatggcataacctttaccggcgaccggcgcggcgatatgcaatttcatggtgtcgcctggacgataattcgctttatccagtttgagggtgacgcgatccggacgcgccgcgccgctaccgtcgctgttatcctgccagctatagccggcccagaaacgaacgctgctgaccgtctcattcggcgctttgacctccagacggtacgcgccccattccaccgggaagctgacttttccggtttcatccgcgttcagatccagcgtctgctcgccctccaccagatctttttgatcaaactgcgactgccagccttcgctttccgaccagttccagtaatagtcgcgacgctcgcggatgagccgcacctgtaaaccggacaccgcttttttctcgccctgcgcgttggcgtaaacaatatcgaatgcggcgttgctgtcttcgtcgacaatcggttgattaacggtggtatccgtacggtagtcgtataccgctttggcggcgaactgtggacgaattcccggtaacgtatcggcaggccaaatcgcctgctctacgcgacgagtgaccggacgaccgccagactccagcaggctggcctgtagaattacctgcaacggcgaatgcgcttcttgccactggctggcagcactcacttcaccacgtccgcctttatccaacgtcagttgaacttcgtccaggctgcgcgaaagattctcttcggcaatattgccgaactggaagccaggcaacgccgcgacagcgtcgcgcagcggacgcaggaaaagttgcccttgcagggtattgccgttagcaggggcgccatacaagtaatagccgacaacggagaatttcacctcatccgcaggcgccagcggtgttttttgcgccgtcaggttgagcgccatccgctccggcataaagtcttccacgtggaaatcccaactccgcagcaaattatcgccggtgttggcgcggacatgccacaagccggtcggcgcgttgatatccagcggataattcaaacggtatagtccgttttccggctggctgacgacggtacgcatcacttgtccgtctggttttaccacttccagcttaacgggttgatcgggcagcgttttaccgtcgctatcgcgcagtaatccgttgaggataaccgtttcgcccggtcggtagagatcgcgtgggccgaacataaagaactgcttgctgtagccgggcgcgccagcgacattaaactccgacagatccagagccggaagcgtgagatcgagcagcgtggtctgcccctctttacgcgccagtaatagcgccgccgctttatcagcctccagttgcacatgtccctgcgcgtcgctcgtcgcctgcgccagcgtctgccctttatcattcagaagaacgatctcaattcccgactgcgccgcgccgttttccaggctttgcgtaaagatatccagccgactatggtaacggtgcgcggacacgccgatatcgctaagggtaaacaacgtagcggcattactatagttgtagtgtccagcctgattcattaccgccacatatacgcccgcctgttgcagcggcttaatatcgcttaatggcagcagcagtttctcacgcgtattacgcgccggattaagatcaaaacgaccggtataaaccagatccgccattttcagcagattgtcggattcccagttagagagggaactacggtactcccactggctgacaaacgacgccagcgatccgggcttaacgcggaaaaagttcacatcaacgtggttgacgttaagcgccatgaccggcagtccttccgctattttccccggtagcagcgatccccggctggcaaagccgacgctgggctggacatcacgcgtggtaatcgttttttcataagacttgccgaaggtggcgttattcagcgctttaacggcgggatcaaccgtgaccaccagcacgcgctcaggttccagatgacgtaaccttagctcttttaaatttggcgccagctcccatgcgccgtcaacgctgccgcttttcttatcaaccacgtgaaccacacgggagaaatcctgttcaggatctaaaggaattgaaaacgtcagcaccagcgtcgccgcgccgtcgagctgcgcttcggaggcgtctaatagcgtgagcgctttgccctggctttgctgcgccagcttttgtagctgtgacgggtcttgcgcgggcgagggctgcgctacggctggcgcttcgcttttagtcgtcggggcggttttatcgttgttatcgcatcccgccagcgccagcatgatcatgcaggccaccacgcgtaaatgtttcatNNNNNNNNNNNttagccctgatgcggcatcaattccgggtggccttgtaccggcggcttgttgctggtcagcgcggcttcatcagcctgaatgctgccggaattggccgcccatacgccttcatgcgtgtgggtgatctgctgatgctgcgcattcaaatcctcgcccattgccgcaatatgcgtgctttccgtaccgccgctgttagtcgcccaggggatgacgggatcgctggcaaatgccatgccggaaatcaaggtcgcagttagcgctgccgttgtcagaaaaagtttcatNNNNNNNNNNNttagatattccgtaaagaagcaaaaagtaaagcccactcgctcttcgcgcgatagaagaccggcggcttgccaatcggcgcatccacggtaatttcaccgccgtggtgagcttcgccggtccagatattcacccagtgatcttccggcaggtacagcgtccaatcgcaacgcccctgctcgtgaaccggcgcgaccagcagatcctgaccgagcagatattgatatttcagggtgtaggtagcggcatcgttctcgtagtgcaggaatagcggacgcatgaccggcagaccggtagccgcgttttgcgccaccgcctgcttgagatacggtttcagcgtggtaaagacggtggtcatgcgggcaaagtgggcaatagtttccgcgtcgccgtcgaactgccagttattgccggggcggttgccttcatgggtgcgcatcatcggcgtaaaggcgctgaaatcgcaccagcgcagcagcaactctttgctgcgcttcatgtcaaacagggtggtgtagccgccgatatcgctgtgatgcagaccatggccggtcatcgccagcgacaatgcagcaggcacgacagaggccagaccatcatcaagactccagtcaacgttctggtcgcctgcccacatcatggtggaatatttctgactgccggtgtaacccgcacgcataaagaacaggatctcgccgagcttgccggtctcctgtagcgcttcgtagttacacttcgcccacagtgcgggccaggcgttatgcatgatctctgcgctgacgccgttgtgcagatacgtgtcggtcggcagatattcgccgaaatccgccatccagccgctgcagccgagcgcgatcatgttctttttgatgacatctttgaaccagtcgtaagcttcaggattagtcagatcgaccacgccgccatagaattcgccaaactcgaccagatagtcgccgcccgtggcgtctttcgccagatagccgtgtctcgccgcctcagcgcagaggtctttatcactggcgacgtatgggttgatataagagaggaactggacgccttcttctttccactgtttgatccggctatccagctgtggatagttgtcgctattccacttccagttccacatcacgcgcttgccaaaggaggtcatgcggataccggaccagtcctgcgcccaaataccgttcacttttacgcctgcgttgcgcatggtatccagtttttgctgacaaacttccgtaccgccctgaatgccgagcgtgacgccgtcgtaaacccagtccggcagctccggctggcgacctaacagcgcagtcagtttttccagcagggcgatgtaggtgtcggcacactcaaaacgcagcgtagttttatcttcccacagcgccagttcgtgatactccggcgcgctgaagtcgaaattcatatagcagctattatcgacgtggcagtaatacttctgcgtgctgacaaaggtcggttgcgggaagaaggtccagtaatagtcgccgccggcgttctctttacagtctgcctgccaggtgacatagctggttttattacggccaacgccctgttcgctggtccacagcgggaacggcttgccgcgcaaatcgaaataagagaactgttcgccgcagccgtagatatggtcgtctggattagctgcgaggcgtaaccagatacggttatggtgcaggtcgtcgttttgcagatccaacgtcaggcgtcccgcctcatcggcggagatgcgaagggtggcgctaattgttgcgccacggctgaattgtaccagccagccgtcgggtagctcgctgacggtggcctccgttaatgcaatcttctcgttaagtttgtctttgatgctgaagttgccgcgaaacatgtcaatgtcggcaacgcccgcgccaatccacagacaggggttttcggcgctgtggcgtaaaatcaggcgctgttgccagctaagcgcaaaaccatcctgtgatgttgtcagttcaaaatcggttgaccgttgtggtagagaattcatNNNNNNNNNNNctactcatcttcaagataagtataaccgtacagtcccgcttcaaattcctcaaggaactgctgctgcaacgcatcgtccagatccgtctgttttacctggtcgcggaaatgcgttaatagcgttttcggatccagttgcacatattgcagcatatccgcaacggtatcgccttcgtccgacaactcaacctcgacactaccatccgggaagacaaacacgtcaaccgcttcagtatcgccaaacaggttgtgcatgttaccgaggatctcctgataggcgccgaccataaagaagccgagcatcggcggattctctggatcgtattccggcatcggcatcgtcgtggcgataccgtcgccatcgatatagtggtcgatagcgccatcggaatcacaggtaatatccagcagcacggcacgacgttccggtacctgatctaacccttccagcggcagcaccggaaagagctgatcgattccccacgcgtccggcatcgactggaacagcgagaagttgacgtacattttgtccgccatccgctcttgcagttcgtcgataatcgggcgatgcgcacggttttgcgggtccagttgcttctgcacttcatggcacatgctgagataaagttgctccgcccaggcgcgctcctgcaaactaaacgcgccggaagagtagccgatatgaatatcgtgcagatccatttggctatcatgcagccattcacgcagcgagcggcgggtgccaggcttatgcatctcctgccaggtttcccacagattttgcagcgcgcgcggcgcatcttcagcaggggcggtcggatccgtgtattcgttacgctccacgccgataatgttagagaccagtaccgtatggtgcgcagtgacggcgcgcccagactcggtaatcaccgtcggatgcggtaaaccatgctcttcgcaggcatcgccaatcgcccagatgatgttattggcatattcgttcaggccatagttcaccgaacagtcggactgcgagcgggtaccttcataatccacgcccagaccgccgcccacgtcgaagcactggatattaacgcccagcttatgcagctcaacatagaaacgcgcggactcgcgcacgccggtcgcgatatcgcgaatgttcgccatctgcgatcccaggtggaagtgcaacagttgcagactgtccagacgcccagcgtcacgcagggtctccaccagttgcagcacctgcgtcgccgccaggccgaattttgatttttcgccgccggaggattgccacttaccggacccctgagaggccagacgcgcacgcacgcccaggcgaggaaccacgttcaggcgctcggcctcttccagcacaatcgcgatttcagacatcttttcgatgaccagataaaccttatggcccatcttctcgccaatcagcgccagccgaatatattcacggtctttataaccattacagacgatcacgctacgggtcatgccggcatgcgccagcaccgccatcaattccgctttcgaccccgcttccagccccaacggttcaccggaatggataagggactcgatcacgcggcgatgctgattgaccttaatcggataaacgaggaagtagtcgccgttataaccgtaagattcacgcgcacgcttaaacgccgcgttaattgaacgcaaacggtgttgcaggatctgcgggaagcagaacagcgccggcagacgctgaccttgcgcttcgcgcgctttcaccagtttggcaagatcgacacgcgcttccggtacgtcgggatcggggcatacgctaatatggcccagctcgttgacgtcgtagtagttattgccccaccaggcaatattgtaagtgcgcagcatcttgctggcttcctgggagctcattgcaacctcctgcatNNNNNNNNNNNttatccgatacgactgacttcatcaaataaggtggctaacccgctgcgccgttccgttcgcgtcacaatcgcgcctgccaggatccgttcatcggcatacagcgataaccgccgccgcgcccgcgtaacagcggtatacaccagctcccgcgtcacgaccggcgaacgttggctgggtaaaatcagcgcggcgtgatcaaattcagacccctgtgatttatgtaccgtcatcgcccaggttgtatcatgttccggcagacggctgggctgaacggacttgatcgtgccgtccggcatcacaaaccagacgcgtaacccctgcccgcgatcgagcgcaataccaatatcgccgttaaatagccccaacgcgctatcgttgcgcgcaatcattaccggacgcccttcataccagcgagagtgcggatgccgctgaatttttcgttgctgcaccatcgcctgctcaatgcggtcattcagtcccctcacgccaaatgggccttcgcgcagcgcacaaagcagttgatactcattgaaagcctgaaggattgcctccggcgccgctttttcatgcagcaaccgcaggtagcgcccatagcccgccagcgcttcatccagcatcccggcataatcgtcgctgctttgcaatgtacgcttctctatatcgctaaacccctgctgaaaaacagcctggattgccgacctgtcgccacagttaattgccgccgccagcttgccgatgccagaatcgctgccgaaacggtagctcttttgcaacaaacagaggctatcgcgtaaagacgcggcttgcgttccggcccccgccggaatggcgctaccagtgagtcgacttagctgtcgggcgcgttccgccgtaaaccctgcgttgacataggcgcaaatatcgcccaacacagcgcccgcctcaacggatgccaactgatcgcgatcgccaagaaaaatgacccgcccgtgcggcggcagagcgtcaatcaaacgtgacatcatcggcaaatcaatcattgatgcctcatcgaccaccagcacgtccagatgcagcgggttgcccgcatgatggcgtaatcgctggctgccgggctgtgcgcccagcagtcggtgcagcgtactggcgtcctccggtatacgctttttctgcgcatcggtaagaggaagctgacgcaacgccgcgccgagcgactccgtcaggcgtgcggccgctttcccggttggcgccgccagccggatacggcaacgttcgccatccgccatttgaattaatgccgccagcagcttcgcgacggtggtggttttaccggtgccgggaccgcctgaaatcacggagatacggcgagttagcgctacggcggcggccaccttttgccagttcacctcgtctgtcggagggaatagcgcgtccagaatacgggataactgatcttcatctacggcgatggcctggttaacctcgttaaaaaagcgcgcaaccgtacgctcgttgcaccacatgcgattcaggtagaggcgatcgccgcacagaattaacggcgcggggctatcgccgcagctaaccgccgcagacgccagtaaccgctttttccagtcgattggcgtagccgtttcgcttatccaggcgaccagtaagggatgcgcctcctccgttaacgttaaacgcgacaacggcagacacacgtgaccttcacctgcgtcatgactaagcagcgctgccgccagcgtcacggcgggatcgtcgttaccggcgacggttaaagcaaactgggcatcaatgggccgtaagagtttttgttcaacggcctccagcaaccgcttctggattgtcatNNNNNNNNNNNttattcctctttctgtgtgggatgctgtcggccaaaaacgacctccatacgggcgccaccgagcagactgtcgctggcaatgatctgcccggcgtattgttccgtaatctcgcgcgcgacagccagccccacgccttgtcctggtcgtagggtatcggcgcgctgaccgcgatcaaacaccagggaacgtttgctgtggggtatgcctgggccgtcatcttcgacgaaaatatgcaaatgatcgtcggtctggcgagccgaaatctcgacaaactccagacaatatttacaagcgttgtccagtacgttgcccattacttcgacaaagtcgttttgctcgccgacaaaactgatctctggtgaaatatccatactgatattcacccctttacgctgataaactttattgagcgcggagatcaggttatctaacaacggcgcgacgggatgcagttcgcggcttaacaacacgccgctaccgcgcatactggcgcgatgcagataatagccgatctgctgggaaatccgactgatctgttccagcatcaccggttcagctttgctgacgctcatcttttcgttgcgtaaagagcgtaacgtactctgcaaaaccgcgagcggcgtttttaaactgtgcgtcaggtcggttagggtcgtgcggtatttgttataacgttcgcgctcgcttttgagcagttgattaaggttgcgcacaaggctggtcagctcacgcgtcgtctccggattgagcatttcgcggtgatgatcttcaagttcgcggacttcccgcgccagcgcctcgatagggcgtaagctccaccaggcggcgatccacagtaaaggaatgactaacagtaaattggcggccagcacgtatacgaaccagctccacaccatataggagcgttttagctctatcggaatggtatcgaccaccacgatggttaactgcggcatccgcgtcgtggcaggataaatatttaccgctaccgagtgggtcatctccgcatcatcgtcatcttcacgtacttctttgagtttttcctgcgcggaatggtcctcgctcaacagcgtgctggtggcgtctacgttggtttcaatttcatggaagccgttcgtttttaaccattccggttgaatgcttttaatcagccaggggatgttgcgctgcgtccataataatttgcccgtttcatcgtaaatcagcgtcatggtcgggctttgcatgtccagattttcaggcagctcaacgctgattttattattttcccatttggcgagggtataaaacaggttgctttcgccgcgcagcagacgaaacgtggttttatcaaaacttacgctatagccgaccagcgccactatgccatatgccagagaaagcaccagcacgacgccggctgtcgccagcaaaaaacgaacccgcagcgacagcggcagaaaatggcgagcaaatttattcatNNNNNNNNNNNtcatttttctgtgatttgttctgcaagtcgggcaatacgccttgccattccccggaaaataaacaggtgcgccgggatcatcagtagccagtaaatcaggcccggcattccgtgtggatgccaccaggcgcgcacgtcaatttcgcggtagcggcctttatcgtgcagcgtgaagctaagccgccccagacccggcgctttcatgccaaacaagagcgtgagctgtttttctggttcgacaatgatcactttccagctatctaccgtatcgccaggcttgagcaaggtatgcgacgggcggcctttcgccagtttatgccccaccagacggtccatcgcggcgcgcgtctgccacaaaatattgccgaaaaaatagccctctttgccacccagccgatttacgacctgccatagcgccgataggctggccggggtctgcgcggtaaagcccgcctgctttggaaaatagccgtattcgggacgccagcgggcgaaggccagcgcgtcgtagccccagtcgctggagttcaccagtttttcttcttctttcagcgtgcggcgaacggcgtcatcaaaggtgataagcgtttgggggatcaacttttttaacgcggcgtcatcggccagcaaatcgtgccttaatccctggattaacgcttttgcggtagttggcggcacggaggtaatgacgtttaaaaaccagaccgaaatccagcgggtcggaaaaggcaccgggatcagcggacgccgtttaccgctgacggccataaaacgttcaaactgctgctgataacttaatacctgcggcccggcggcttccagaatacgatgctcgtgcgcagggtgctccagtaagccgaccaggtagtagagtaaattttccagggcgatgggcgtggtgcgcgaacgcacccagcgcggcggcgtgagtattggcaggttgtaaaccatgtcgcgcatgacctcaaaggcggcggagcctgcgccgacgatgatcccggcgcgtaattccgtcaccggtacgcctgcgtcgcgcagcgtgtcagccgtaagctggcgggcgcgcaggtgatcggattgctcatgcgccggcgcctgcaatgaactgaggaaaataagttgtttaaccggcgtctggcgcagcgcgtcgcgcacgttgagcgccgcctgacgctcatgggcgataaagtcgccgccttcgcccatgccgtgtaccagatagtaaacggtatcaatgtcgcgaagcagcgcgggtaaattttccggccagtgcagatcgaccttatgacaactgacgttggcgaggcgatgtttttccagacgttccacgcgccgcgccgccgcccgcacctgatgtccttgctgacttagcgcaaagaccaggtgctgaccgatatagccgctggcgccgaggaccagaatgcgttgcgccacNNNNNNNNNNNctagatcacgtattcgatcaacgctggttcttgtttacagaggcgacgccagtcgacaatcggcattcgtacctgcagactgacgctaccgtcttcctccatccactctttttctattgcctgaagctgataaaaccggcttctcagacgcccttcctgcggcagcaaacgcagcgtatgctgcgccacctcgccggaaagacgctccgtcaaagcctgaaaaagctgtggtattcccacgccgctttgcgctgaaagccaaacgcggatgggtttattctcttcatctctgtcgatacgcggttcaaagtcgtccagcatatcgattttgttcatcaccattaaggtggggatttcgtgagcgtcaatctcttcaagaacggtgtttaccgcctcgatgttttcctgcacacgaacatccgccgcatcgaccacatgcagcagcagcgtcgcctgacgcgtctcctgcagggtagctttaaaggcagccaccagatcgtacggtaaatggcggataaagcctaccgtatccgccagaacggtttcaccgacatccgctacatcaatacgacgtaacgtggggtccagcgtcgcaaatagctgatctgccgcatagacccgcgcttcagtgatctgattaaaaagggtggattttccggcgttggtatagcccaccagcgataccgtcggaacgtcggccttgatgcgcgactgccgcccctgctcacgttgcttctcaactttctccaggcgcgactgaatctgcacaatgcgattacgcagtaaacgacggtcggtttcgagctgggtttcacccggaccgcgcaaaccaatcccgcctttctgacgttcaaggtgggtccagccacgcaccagacgcgtagccagatggcgtagctgcgccagctcaacctgcaacttaccttcatgggtacgcgcacgttgggcaaaaatatctaagataagaccggtgcgatcgataacccggcactcgcacaaacgctccaggtttcgctcctgggctggactcaatgcatgatcaaacaatacgaccgctgcgccagtcgctttcacggcttccgcaatttcaactgccttaccttcacctacaaagtacttcgggtgcggtgctttacggctaccggtaatcacctgcattgcttcgacaccggcggaagagaccagagattcaaactcctggaggtcttccatatctttgtcttgcgaaaaatagatgtgtaccagtaccgcctgctcaccggcatcataacggtcaaacaaNNNNNNNNNNNtcagccgctaaacacgttaccggcgcccggcgcgctttttaacacccagacgcgaccatagtgattataccatccggcacgatgcccggcatccgggccaatcccctggtaaatatcaaagtgctggcctttaatcgctccgccgacatccagtgcgaccatcaaacgtagctcatactgaccgctaaatttaccgttgttatccagcaacggtacttccgccaacaaggttgtgcccggcggaatgatgctgcggtcggaggcgacggatgctcgcccaatcagcggtacagcgctggcgcctttgaccggcgcaaaagattgcggtttaaagaagacgaacgacgggttctgctccagtaattcacgcacttccgcttcgctgtgcttctctccccattcgcgtatagcctgcatcgacatatcttcttttttcacttcaccgcgatcgataagcactttaccaatactgcgataaggccagccatttttaccggcataactaaagaagttcagcggactaccatcaccgaaatcaatataaccgctgccctggacatccataataaagttatccatcagcgaattactccaggccaggatgtacttatcgctcagcgcgcctgcgtagatctgggcgcgggacggtaagcgtccgcgttttggcggcatactatagatagggtactggaacgcgccctggcgcgtatggcgagcctgaacgacgggcgtatagtagcccgtgaactggacgttaccgtagttgtcggtgccttccatctgccaggcatcgataccaaactgacgcatagtgcgcgtatcgcctccggaacgtaaccagttctggacagcgttatagacgttgctttgattggtgtataaacgcggcgacgcggaacggatctggtcgacctgctcggcaaagtcaccagcattaatcggcgcgcccaccgcgtccggctggtttaccagggagaagggctgggtaaatttcccgtccttatattgctgaccgcgatcggtcggttttgatgaacaggcagccagcattgccagcattacgcctgtcgccacatattttgcccaacgtcctttcatNNNNNNNNNNNtcattctgacacctccattttttgcgccattttggatgctctgtattcagggatggtggtcacaatcgcaccgactaacgcgaacagcgtaccgatgatggtgaccaggtagaccgtattgcctaatgaagggatcaattcatcaattagcactgagccaagcagttggcctgctgttgacgctacgcccagcatcaatagccctaagcctctcaccagaatcgccattagcccgatggatagcagacccagcggaccaccgagatacatccaccatgtatcgggtaactggatggtgacatggcctaatgcgatacgtatcgccagcgccgcgcccaggacacaaaagccgacgatgaagttccatgtaatggacaccagcatggagcccgttgcctcggcgactttcgcattccccgcaggctgccagccagcgagtaaccctgccaaaaaggggaggatagcgagcaggataaacgaggttgagtgccactgtggcgacacgacaaaaatggtggcgataacggcgaacaatgcgccagtaatgcgccatggcgtaaaatattttttctcctccacgccgatgccaaaacggtcgcacagcaggccggaaagaagcagagcggaaattaatgccgtttgaaaggtggcaacgcccagcgcgctggcggatgcgccttcagaaaatacgaccatcgccccgcataatcctgcaaaccaattccatagcgggatttttctctttttaatcagagtagggattgaggcgaattgctggcgtgtttctttgcgcgcaataataataaaaaacatgacgaccagaccgctggcaaacgagattactgcgcaagcattaccgtcttgtaaccaatgtcctaactgcccattaacggcagactgcatcggggaaagcataccggctaagatggtggcaagcatcagtaagggggttgagtacttattcttgttcatNNNNNNNNNNNtcagttaaacggttgtaagtcgacacgcgccatcattgcggccaactgaggacgatcggtaatacccacattgctctggctgaccgccagcgcggcaaccgccgtcgccagacgcagcgtatgttcgctggactcgcgcatcagcaggccgtaaatcaatccgccaaccatggaatcgcctgcgccgacggtacttaccacgtcaaccgccggtggtttagcgatccattctcctgaggcgttaacccacagcgcgccttccgcccccagcgaaatcaccacatgagcgataccctgttcgcgtaacgcgtgcgccgcatcaatcacatctttcatttccgggagcttacgacccgcccaaatttccagttcgcggcgattcggtttcaccagccacggcgcagctttaagaccggcgactaacgcttcacggctactatcaaagataatgcatggacactggctgcgcagacgcgtcatccagtcggtgaacgcttccggactcacgccagccggtaagctaccgctgacgcagaccatatcgaactgacccagccagctcagggagtcgttaacaaagcgttcccagtctgcgggagtcacgtcaaagccggaaaagttgaagtcggtcacttcgccatctttttccgtcagcttcacgttgatgcgggtccggccctgaaccacctgaaagcggttagcgatacccagttcgctgaataattgctgaaaaccgtcctggttatctttaccgagaaaaccgccgacagtgacgtcgatgcctaagtctttcagcactttggcaacgttaatgcctttgcccgccgcgtgcagacccgtggttttcaccaggttcacNNNNNNNNNNNttaccattgcgtgccaactcccacgctgtctaaccagtctgaaaccacatcatgcgcgctgtgcgcggttaaatccacatgcaacggcgtgacggagacgtagccttcatccaccgccgcgaaatcggtatccggcccggcatcgtatttatcacccggcgggccaatccagtacaatgtattaccgcgtggatcttcctgcgggatcactttatccgctggatggcggctaccgcagcgagtcacgcggatgcctttaacctgcgctaacggtagatccgggacattcacgttgagaatacgcccggtacgcaacggctcccggcttaaccctcgcaaaagcgcgcaagtcacggctgcagccgtatcataatgctgatagccgttaagggagaccgctaatgccggaaagccgagatgacgaccttccatcgccgcggcgacagtaccggaatagatcacatcatcgcccagattcggacccgcgttaataccggaaacgacaatatccggacgcggacgcattaaggcattaacgcccagatagacgcaatcggtcggcgtccccatctgtacagcgatatcgccattatcaaaggtaaaagtacgaagcgaagattccagcgtgagggaattagacgcgccgctgcggttacgatccggggctacgacctgtacatcagcaaactcacgcagcgctttcgccagcgtttgtataccgggcgcgtgaaccccgtcatcgttactcagcaatatgcgcatNNNNNNNNNNNttagtccccttcaaggagcaatacagacacaacaataatgataaaaatggcgaaaaacgacgctgttatcatcagcgcttcaagaaacggtggatcgtacatNNNNNNNNNNN----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- diff --git a/tests/test_data/genomes/G4.fasta b/tests/test_data/genomes/G4.fasta new file mode 100755 index 0000000..9dc612d --- /dev/null +++ b/tests/test_data/genomes/G4.fasta @@ -0,0 +1,2 @@ +>G4 +NNNNNNNNNNNctagaatgaaccggaatgcggctctaaactcatcccataatcgctgattaatcgccacgttaccgtaccatgaggggcggcagtcgaactcaatttaaatgacgctgatgattttggcggaacgaaggtcgctgatttcacttcatggctgttaagcgtcacactggcaaagttcatataatagggggtagggttatttacggtaataacatcccctgccgtctgccattttaattgttggctctggctatcaggcgttgatttggttaatgctggcggtcgataaataagctttatttgggtattaatggagatttccacgcggttcgcggaggcattatcatcaatagaaggaatacccttaatattgagccagtacatagactcccgatctgcaggtagaggggcgccagagcgaataacccgaatactgttcttttgcccggcatcaaggcgaaaaagaggcggggtgataataaacacctgcttatttgtgacctggggatcggcaaccgataaccatgactgaacaagattcgctttgctgtctttattctctacattgattgacgactcatcattattgccgtcgaaaaccagtcgggttccgccaacaacaatgctggcatgggcaacatggcttgtcagtaatacgaccaacacggatttctttatgtgtttcatNNNNNNNNNNNtcatgccgcttgctcctgcacccacttcgtcaccgtggcgcgaatgtcgcgcatgacggcatccagcggctgggtcgcgtcaatggtgcggatacgcgagtcttgcgccgccagttccagatagcgcgcgcgagtccggttaaagaaatcaaaagattcctgctcaatgcgatccaaatcgccgcgcgctctggcgcgttttagcccgacttccggcgtgacatccagatacagcgtcaggtcaggacgaaaatcgcccagcacggcatcgcgtagtgtcgccagcatggtttgatcaatgccgcgccctcccccctgatacgcctgggtcgagagatcgtggcgatcgccgatcacccatacgccttgcgccagcgcgggtttgattaccgtttcgacgagctgtacgcgagcggcataaaacatcagcacttccgctttatcggtaatcacttcgtcgcctaccgatcggatatccagcaccagacttcttagtttttcggcaagctgcgtaccgcccggctcacgggtaaaaatcatgttacgaatacccagttgctcaagggtctccaccaccacgtcgcgcgcagtggtttttccggcgccttccaggccctcgatgacgatataattactgcccatNNNNNNNNNNNctagccgggaacaaattcaccgtctaaaaggagttttccgttttccgcatacataacgagcgcattaagatactccggatcaaacttcacgccgcgcttattttttacaatatacgtcaaacaatgattatgggttaaaatgactatatttttaatctgcgaatttttcagtaacgtattgattgaagaataaataccgctgccgcaatccatcatttttttaaccgccctacgcgacctgcctgccgaaaaccaggttgccgactggatggcgcgcaccgtattactggaatagagattgtaattttgtatatcggcactgaaggctttacccagcgctctggcatcttgcgcaccgttgaccgtaatccccgtgctgtctggcaggcaggtattatcggaacgatcgcaccgctcggcatggcggaataacacgactaccggatgctgcttcgccagcgccgccagcgccttaccgttaatctgcggtagaccgttaccgctacaggcatgttgtgagtccaacccagcgataataaccaacgcccccgccagaatcgcgaaatagcgtttgtttttaataaagcgtagggtaaatgccagcacNNNNNNNNNNNttaaaccgcttgttcaaggctgaaactgtgacagtgaacctgcggtttcagcctgtcgttcataattttaatcgcatcgccaagctgcatggttcggcctgctatcaccacgcctggctgtgccagcagacacaacgcggcattttctcccggctcaacgaccagcaaattaacctgttccgccgtatcgcttaaccagacgcttgcggcatcacccgtgcccggggtccacatttcgccatgcgccacaaaatgccagcttttcggcatctgcggtttgagatagcgaatcgccaccagcgcattcaataccagctccgcgcgttgctctttggttaattcgaaatcccggcatttttcttcaaaggaaaaatagagcgcggcatcatccacacaaaaaccggtcgggcaaaacgcgtccggcgtaagcattttacgagagaagcgcgagcgaaaaagcataccattggcgagatcgagcatcatacgatcgtgctcttcatcataataccagcgccagttatcgtcaggtttaattcgcatNNNNNNNNNNNttataaggcttgcagtctttcatgggcagcaagtaacgtctgatatatgcttaaattcttacttccgggttcaagtagaacttttttaaattcggtcatgtgttgctcttcaacttctttcgaacgcatgtattgtggaagttcctggaagaaggtaagcgcctgttctttggtttgcttatatttttcgcaaaaaatgcgtgagctgattgcgctattttttgatgcggtattatcagcgatctggtttgataatgatttattcttcgcaaggtctgaaggcacatacggaagtgactgacactcatcaataccatttgcgttggccagttgctctttctgagcgctaggttgctgtaccggtttgctcacggaggaacggagaggcacctgggcacagccgatcagtaaaaagacaggatgacagctatcaaattttttcatNNNNNNNNNNNttagtgcgcttttacccgcctgaaccagtaattttccatcttcgttatccatttccactttttatttttcggtattacgccagccctaagtaattgcagctgtcggttgtaggttccaatagcgtgtccaccatacgccccttcaaatgctatcaccatgggtccctgcccttttaccccctcttgtcgttcatggcaataaagcgcgttgcggtgagtcacgtcaagttcatactctacgttcagtttcacacatttttccccgcaggcttctactggcggcaatattgtcacggtataaggcgggttatctgcaggggttttccatgttccgatcaggtctttgcaggcgtttatctccgtagccccggaggaggcagaaaatagcgcgagcaaataaaaaggtattagtttcagNNNNNNNNNNNttagtcccaccaaacgtcgaaaagttcgctggttcggacttcttcaaggttgcgcgcttccagccacttacgcacaatcgcctgatgttcttcggtacatttaccgatttcctgcatacaaatcagcccttcccaggccaggtagccgctgccgtcaaacgccagtttattaggctcaataacgtcattaataaagtcatcgacagtcttatcgatctgctcttcagatgtaccttccggaaaacgccatgccaccgaaaatcctaattcctggaattcgtcaatgtgcatttttttacgcagacgacgactacggttctttgccatNNNNNNNNNNNttaaggcgtcacaatcagcagtccctcgctcgcgccggttgcccgccactgaggcgcgtacatcgattccacctgcggttgcgcaagctggtacgtccccggcgttaccgcgcgcgccaggtagaccagcgtcacgggctggccctcattgacaacgacggcagccacaaaccgatcgtcgcggaattccatatactgaatatccgcctgctgcatctgattaagcagattttgcacttcgctaccgctctccggcaggctggcgctgctgtcagccagattctggttttccagctccagcccggccgggagcaggtccaccaccagcgcatccggcacattgcgatcggccactaccgttaaccagaccagcaccagttcgccgctacgcaacgaggacagcgatttgcgctgaccatcggtccccagtatttgtcgttcaatctgcaaaacgttgctggcaggctcaggcgcagatgagggatagccgctgctatccagacgcagccatagcggctggctaccggtgttcgtcacctcaagggcggccagctgatcagcatccagattacgggtcagcgccttgtcgcccgacagcggctgcgcctctaacgaggtctgcgcctgccaggcgcccgcgctggcctgtcgcgaatgcgcggcgaggaacaaggcattgttctcctgggtagagagccagcgctgaccgaaggcctgctcagaaagcgagcttaatagcgcgttttgcgcgtccggtctgaggttgttctcttccagtaacgacaacatcagagcgttatcgcgcagagaactgccgtaatccgctatccattgccgttcgtcctgacgcggcgtattcagagccagcgtaatggcctcttcgccgcgtctggcatcacccatcgtgtttaacgcgatgcccaattgcatcagcggcagtcctgaagccgcctgactacggcgctcccagatttcgcgcagcgcgccgagcggcgctttctgctgacgcgccagtaccagcgcggcgtaagcctgagcggcaaaagtactggcctgggtattatcgctataacgaatcagcatcgtaccgggatcctgcagatagcgcagcagtcgctcattgccccggttaatggcctccggcgggacgctatatccctgctcgcccgcgcgaatgaggaaatccatcgcgtaggccgttagccagggctcttccgccccattttcatcccatagcgcaaaaccgccgttatcacgctgcatctgtagtatgcgggagatgccgatatccaccgcggcgcgccgtttttcatcgctatcgccggtaataccgagcgactgcaattgagcggcattggtatacagcgccgggaataacccgctggtggtttgttccaggcacccgtacggatatgctttcagctcgcgaatgtagcgcgccagattgagcggcggttttccgcttaacagcagttgtccctgtaacgtggctggcgagacgtttgccagatgctgctctggtacatgccagctctctcccggcgccagcgcaatgccgctatttaccgtttgggcaggccaggccggacgcacgccgatttgccactgcttatgctgcgcgccgagggtttctcccggcagattcagaccgctaatggtcgcctggatttcgccttcgccaaaaccttccagcgcgcgtaccggaacgaataaggtggtgcgcacgcccggcgccaggttgaccggttgcggctgttgactaagcagttccagtaacccactggcggcgagcgcaatattcagcgtctgcgggcggtcggtcagattggtgacgtccagcaccagtcgcgaaacatcccctcccgccagaaaacgcggcatattcagctcggcaatcactggcgcggcgacaacgactttgctttcgccgcgaccaaaatcgtccgctgtccatgcctgcgccataacccgcagttcgccgttaaagtcgccaatcggcagcgttacgaccccttcgccctgctcattgagcgtgatcggctgcgcctgctgcgcgatgatattggcatggtttaccggcggttttccgccgcgcgtaaggtcgtcgccatcgccgccaaaacgcaacgccgccagccgcccctgcccttcaatgacctggccgtaaatatcgtagatatccgcaccgtagcgtttttgaccgaagaacgcctgccacgggtccggcgtcgcgtaatcggtgatattcaatacaccgctatcgaccgcggagaccagcacgttgatctgttttggcatttcgccgtgtttaacgctggctttcaccctgacggtgagcggctgattcgggcgcattttagccgggctttccagcgccagatcgaggcggcggttgtcatcccccagcggtagatgtagtaaccccacggcgcgttttggcgtcgcggaacgagatttatcgccgggacgcaccaccagcgtactgagatagagatcgtggcgattccaggttttatccaccggaatcgtgagctccagcccctgcgccggcacgtcgatcgcctgccaccacagcggaccatcgctggactccaccatggcataacctttaccggcgaccggcgcggcgatatgcaatttcatggtgtcgcctggacgataattcgctttatccagtttgagggtgacgcgatccggacgcgccgcgccgctaccgtcgctgttatcctgccagctatagccggcccagaaacgaacgctgctgaccgtctcattcggcgctttgacctccagacggtacgcgccccattccaccgggaagctgacttttccggtttcatccgcgttcagatccagcgtctgctcgccctccaccagatctttttgatcaaactgcgactgccagccttcgctttccgaccagttccagtaatagtcgcgacgctcgcggatgagccgcacctgtaaaccggacaccgcttttttctcgccctgcgcgttggcgtaaacaatatcgaatgcggcgttgctgtcttcgtcgacaatcggttgattaacggtggtatccgtacggtagtcgtataccgctttggcggcgaactgtggacgaattcccggtaacgtatcggcaggccaaatcgcctgctctacgcgacgagtgaccggacgaccgccagactccagcaggctggcctgtagaattacctgcaacggcgaatgcgcttcttgccactggctggcagcactcacttcaccacgtccgcctttatccaacgtcagttgaacttcgtccaggctgcgcgaaagattctcttcggcaatattgccgaactggaagccaggcaacgccgcgacagcgtcgcgcagcggacgcaggaaaagttgcccttgcagggtattgccgttagcaggggcgccatacaagtaatagccgacaacggagaatttcacctcatccgcaggcgccagcggtgttttttgcgccgtcaggttgagcgccatccgctccggcataaagtcttccacgtggaaatcccaactccgcagcaaattatcgccggtgttggcgcggacatgccacaagccggtcggcgcgttgatatccagcggataattcaaacggtatagtccgttttccggctggctgacgacggtacgcatcacttgtccgtctggttttaccacttccagcttaacgggttgatcgggcagcgttttaccgtcgctatcgcgcagtaatccgttgaggataaccgtttcgcccggtcggtagagatcgcgtgggccgaacataaagaactgcttgctgtagccgggcgcgccagcgacattaaactccgacagatccagagccggaagcgtgagatcgagcagcgtggtctgcccctctttacgcgccagtaatagcgccgccgctttatcagcctccagttgcacatgtccctgcgcgtcgctcgtcgcctgcgccagcgtctgccctttatcattcagaagaacgatctcaattcccgactgcgccgcgccgttttccaggctttgcgtaaagatatccagccgactatggtaacggtgcgcggacacgccgatatcgctaagggtaaacaacgtagcggcattactatagttgtagtgtccagcctgattcattaccgccacatatacgcccgcctgttgcagcggcttaatatcgcttaatggcagcagcagtttctcacgcgtattacgcgccggattaagatcaaaacgaccggtataaaccagatccgccattttcagcagattgtcggattcccagttagagagggaactacggtactcccactggctgacaaacgacgccagcgatccgggcttaacgcggaaaaagttcacatcaacgtggttgacgttaagcgccatgaccggcagtccttccgctattttccccggtagcagcgatccccggctggcaaagccgacgctgggctggacatcacgcgtggtaatcgttttttcataagacttgccgaaggtggcgttattcagcgctttaacggcgggatcaaccgtgaccaccagcacgcgctcaggttccagatgacgtaaccttagctcttttaaatttggcgccagctcccatgcgccgtcaacgctgccgcttttcttatcaaccacgtgaaccacacgggagaaatcctgttcaggatctaaaggaattgaaaacgtcagcaccagcgtcgccgcgccgtcgagctgcgcttcggaggcgtctaatagcgtgagcgctttgccctggctttgctgcgccagcttttgtagctgtgacgggtcttgcgcgggcgagggctgcgctacggctggcgcttcgcttttagtcgtcggggcggttttatcgttgttatcgcatcccgccagcgccagcatgatcatgcaggccaccacgcgtaaatgtttcatNNNNNNNNNNNttagccctgatgcggcaacaattccggttggacttgtaccggtggcttgttgctggtcagcgcggcttcatcagcctgaatggtgccggaatttgcggcacatacgccttcatgcgtgtggatgatgtgctgatgctgcgcattcacatcctcgcccattgccgcgatatgcgtgctttccgtaccgccactgttagtcgcccaggggatcacggtatcgctggcaaatcccatgccggaagtcaaggtcgcagttagcgctgccgttgtcagaaaaagtttcatNNNNNNNNNNNttagatattccgtaaagaagcaaaaagtaaagcccactcgctcttcgcgcgatagaagaccggcggcttgccaatcggcgcatccacggaaatctcaccgccgtggtgagcttcgccggtccagatattcacccagtgatcttccggcaggtacagcgtccaatcgcaacgcccctgctcgtgaaccggcgcgaccagcagatcctgaccgagcagatattgatatttcagggtgtaggtagcggcatcgttctcgtagtgcaggaatagcggacgcatgaccggcagaccggtagccgcgttttgcgccaccgcctgcttgagatacggtttcagcgtggtaaagacggtggtcatgcgggcaaagtgtgcaatagtttccgcgtcgccgtcgaactgccagtcattgctggggcggttgccttcatgggtgcgcatcatcggcgtaaaggcgctgaaatcgcaccagcgcagcagcaactctttgctgcgcttcatgtcaaacagggtggtgtagccgccgatatcgctgtgatgcagaccatggccggtcatcgccagcgacaatgcagcaggcacgacagaggccagaccatcatcaagactccagtcaacgttctggtcgcctgcccacatcatggtggaatatttctgactgccggtgtaacccgcccgcataaagaacaggatctcgccgagcttgccggtcttctgtagcgcttcgtagttacacttcgcccacagtgcgggccaggcgttatgcatgagctctgcgctgaccccgttgtgcagatacgtgtcggtcggcagatattcgccgaaatccgccatccagccgctgcagccgagcgcgatcatgttctttttgatgacatctttgaaccagtcgtaagcttcaggattagtcagatcgaccacgccgccatagaattcgccaaactcgaccagatagtcgccgcccgtggcgtctttcgccagatagccgtgtctcgccgcctcagcgcagaggtctttatcactggcgacgtatgggttgatataagagaggaactggacgccttcttctttccactgtttgatccggctatccagctgtggatagttgtcgctattccacttccagttccacatcacgcgcttgccaaaggaggtcatgcggataccggaccagtcctgcgcccaaataccgtacacttttacgcctgcgttgcgcatgttatccagtttttgctgacaaacttccgtaccgccctgtatgccgagcgtgacgccgtcgtaaacccagtccggcagctccggctggcgacctaacagcgcagtcagtttttccagcagggcgatgtaggtgtcggcacactcaaaacgcagcgtagttttatcttcccacagcgccagttcgtgatactccggcgcgctgaagtcgaaattcatatagcagctattatcgacgtggcagtaatacttctgcgtgctgacaaaggtcggttgcgggaagaaggtcaagtaatagtcgccgccggcgttctctttacagtctgcctgccaggtgacatagctggttttattacgaccaacgccctgttcgctggtccacagcgggaacggcttgccgcgcaaatcgaaataagagaactgttcgccgcagccgtagatatggtcgtctggattagctgcgaggcgtaaccagatacggttatggtgcaggtcgtcgttttgcagatccaacgtcaggcgtcccgcctcatcggcggagatgcgaagggtggcgctaattgttgcgccacggctgaattgtaccagccagccgtcgggtagctcgctgactgtggcctccgttaatgcaatcttctcgttaagtttgtctttgatgctgaagttgccgcgaaacatgtcgatgtcggcaacgcccgcgccaatccacagacaggggttttcggcgctgtggcgtaaaatcaggcgctgttgccagctaagcgcaaaaccatcctgtgatgttgtcagttcaaaatcggttgaccgttgtggtagagaattcatNNNNNNNNNNNctactcatcttcaagataagtataaccgtacagtcccgcttcaaattcctcaaggaactgctgctgcaacgcatcgtccagatccgtctgttttacctggtcgcggaaatgcgttaatagcgttttcggatccagttgcacatattgcagcatatccgcaacggtatcgccttcgtccgacaactcaacctcgacactaccatccgggaagacaaacacgtcaaccgcttcagtatcgccaaacaggttgtgcatgttaccgaggatctcctgataggcgccgaccataaagaagccgagcatcggcggattctctggatcgtattccggcatcggcatcgtcgtggcgataccgtcgccatcgatatagtggtcgatagcgccatcggaatcacaggtaatatccagcagcacggcacgacgttccggtacctgatctaacccttccagcggcagcaccggaaagagctgatcgattccccacgcgtccggcatcgactggaacagcgagaagttgacgtacattttgtccgccatccgctcttgcagttcgtcgataatcgggcgatgcgcacggttttgcgggtccagttgcttctgcacttcatggcacatgctgagataaagttgctccgcccaggcgcgctcctgcaaactaaacgcgccggaagagtagccgatatgaatatcgtgcagatccatttggctatcatgcagccattcacgcagcgagcggcgggtgccaggcttatgcatctcctgccaggtttcccacagattttgcagcgcgcgcggcgcatcttcagcaggggcggtcggatccgtgtattcgttacgctccacgccgataatgttagagaccagtaccgtatggtgcgcagtgacggcgcgcccagactcggtaatcaccgtcggatgcggtaaaccatgctcttcgcaggcatcgccaatcgcccagatgatgttattggcatattcgttcaggccatagttcaccgaacagtcggactgcgagcgggtaccttcataatccacgcccagaccgccgcccacgtcgaagcactggatattaacgcccagcttatgcagctcaacatagaaacgcgcggactcgcgcacgccggtcgcgatatcgcgaatgttcgccatctgcgatcccaggtggaagtgcaacagttgcagactgtccagacgcccagcgtcacgcagggtctccaccagttgcagcacctgcgtcgccgccaggccgaattttgatttttcgccgccggaggattgccacttaccggacccctgagaggccagacgcgcacgcacgcccaggcgaggaaccacgttcaggcgctcggcctcttccagcacaatcgcgatttcagacatcttttcgatgaccagataaaccttatggcccatcttctcgccaatcagcgccagccgaatatattcacggtctttataaccattacagacgatcacgctacgggtcatgccggcatgcgccagcaccgccatcaattccgctttcgaccccgcttccagccccaacggttcaccggaatggataagggactcgatcacgcggcgatgctgattgaccttaatcggataaacgaggaagtagtcgccgttataaccgtaagattcacgcgcacgcttaaacgccgcgttaattgaacgcaaacggtgttgcaggatctgcgggaagcagaacagcgccggcagacgctgaccttgcgcttcgcgcgctttcaccagtttggcaagatcgacacgcgcttccggtacgtcgggatcggggcatacgctaatatggcccagctcgttgacgtcgtagtagttattgccccaccaggcaatattgtaagtgcgcagcatcttgctggcttcctgggagctcattgcaacctcctgcatNNNNNNNNNNNttatccgatacgactgacttcatcaaataaggtggctaacccgctgcgccgttccgttcgcgtcacaatcgcgcctgccaggatccgttcatcggcatacagcgataaccgccgccgcgcccgcgtaacagcggtatacaccagctcccgcgtcacgaccggcgaacgttggctgggtaaaatcagcgcggcgtgatcaaattcagacccctgtgatttatgtaccgtcatcgcccaggttgtatcatgttccggcagacggctgggctgaacggacttgatcgtgccgtccggcatcacaaaccagacgcgtaacccctgcccgcgatcgagcgcaataccaatatcgccgttaaatagccccaacgcgctatcgttgcgcgcaatcattaccggacgcccttcataccagcgagagtgcggatgccgctgaatttttcgttgctgcaccatcgcctgctcaatgcggtcattcagtcccctcacgccaaatgggccttcgcgcagcgcacaaagcagttgatactcattgaaagcctgaaggattgcctccggcgccgctttttcatgcagcaaccgcaggtagcgcccatagcccgccagcgcttcatccagcatcccggcataatcgtcgctgctttgcaatgtacgcttctctatatcgctaaacccctgctgaaaaacagcctggattgccgacctgtcgccacagttaattgccgccgccagcttgccgatgccagaatcgctgccgaaacggtagctcttttgcaacaaacagaggctatcgcgtaaagacgcggcttgcgttccggcccccgccggaatggcgctaccagtgagtcgacttagctgtcgggcgcgttccgccgtaaaccctgcgttgacataggcgcaaatatcgcccaacacagcgcccgcctcaacggatgccaactgatcgcgatcgccaagaaaaatgacccgcccgtgcggcggcagagcgtcaatcaaacgtgacatcatcggcaaatcaatcattgatgcctcatcgaccaccagcacgtccagatgcagcgggttgcccgcatgatggcgtaatcgctggctgccgggctgtgcgcccagcagtcggtgcagcgtactggcgtcctccggtatacgctttttctgcgcatcggtaagaggaagctgacgcaacgccgcgccgagcgactccgtcaggcgtgcggccgctttcccggttggcgccgccagccggatacggcaacgttcgccatccgccatttgaattaatgccgccagcagcttcgcgacggtggtggttttaccggtgccgggaccgcctgaaatcacggagatacggcgagttagcgctacggcggcggccaccttttgccagttcacctcgtctgtcggagggaatagcgcgtccagaatacgggataactgatcttcatctacggcgatggcctggttaacctcgttaaaaaagcgcgcaaccgtacgctcgttgcaccacatgcgattcaggtagaggcgatcgccgcacagaattaacggcgcggggctatcgccgcagctaaccgccgcagacgccagtaaccgctttttccagtcgattggcgtagccgtttcgcttatccaggcgaccagtaagggatgcgcctcctccgttaacgttaaacgcgacaacggcagacacacgtgaccttcacctgcgtcatgactaagcagcgctgccgccagcgtcacggcgggatcgtcgttaccggcgacggttaaagcaaactgggcatcaatgggccgtaagagtttttgttcaacggcctccagcaaccgcttctggattgtcatNNNNNNNNNNNttattcctctttctgtgtgggatgctgtcggccagaaacgacctccatacgggcgccaccgagcagactgtcgctggcaatgatctgcccggcgtattgttccgtaatctcgcgcgcgacagccagccccacgccttgtcctggtcgtagggtatcggcgcgctgaccgcgatcaaacaccagggaacgtttgctgtgggctatgcctgggccgtcatcttcgacgaaaatatgcaaatgatcgtcggtctggcgagccgaaatctcgacaaactccagacaatatttacaagcgttgtccagtacgttgcccattacttcgacaaagtcgttttgctcgccgacaaaactgatctctggtgaaatatccatactgatattcacccctttacgcagataaactttattgagcgcggagatcaggttatctaacaacggcgcgacgggatgcagttcgcggcttaacaacacgccgctaccgcgcatactggcgcgatgcagataatagccgatctgctgggaaatccgactgatctgttccagcatcaccggttcagctttgctgacgctcatcttttcgttgcgtaaagagcgtaacgtactctgcaaaaccgcgagcggcgtttttaaactgtgcgtcaggtcggttaggctcgtgcggtatttgttataacgttcgtgctcgcttttgagcagttgattaaggttgcgcacaaggctgatcagcttacgcgtcgtctccggattgagcatttcgcggtgatgatcttcaagttcgcggacttcccgcgacagcgcatcgatagggcgtaagctccaccaggcggcgatccacagtaaaggaatgactaacagtaaattggcggccagcacgtatacgaaccagctccacaccatataggagccttttagctctatcggaatggtatcgaccaccacgatggttaactgcggcatccgcgtcgtggcaggataaatatttaccgctaccgagtgggtcatctccgcatcatcgtcatcttcacgtacttctttgagtttttcctgcgcggaatggtcctcgctcaacagcgtgctggtggcgtctacgttggtttcaatttcatggaagccgttcgtttttaacccttccggttgagtgcttttaatcagccaggggatgttgcgctgcgtccataataatttgcccgtttcatcgtaaatcaccgtcatggtcgggctttgcatgtccagattttcaggcagctcaacgcagattttattattttcccatttggcgagggtataaaacaggttgctttcgccgcgcagcagacgaaacgtggttttatcaaaacttacgctatagccgaccagcgccactatgccatatgccagagaaagcacgagcacgacgccggctgtcgccagcaaaaaacgaacccgcagcgacagcggcagaaaatggcgagcaaatttattcatNNNNNNNNNNNtcatttttctgtgatttgttctgcaagtcgggcaatacgccttgccattccccggaaaataaacaggtgcgccgggatcatcagtagccagtaaatcaggcccggcattccgtgtggatgccaccaggcgcgcacgtcaatttcgcggtagcggcctttatcgtgcagcgtgaagctaagccgccccagacccggcgctttcatgccaaacaagagcgtgagctgtttttctggttcgacaatgatcactttccagctatctaccgtatcgccaggcttgagcaaggtatgcgacgggcggcctttcgccagtttatgccccaccagacggtccatcgcggcgcgcgtctgccacaaaatattgccgaaaaaatagccctctttgccacccagccgatttacgacctgccatagcgccgataggctggccggggtctgcgcggtaaagcccgcctgctttggaaaatagccgtattcgggacgccagcgggcgaaggccagcgcgtcgtagccccagtcgctggagttcaccagtttttcttcttctttcagcgtgcggcgaacggcgtcatcaaaggtgataagcgtttgggggatcaacttttttaacgcggcgtcatcggccagcaaatcgtgccttaatccctggattaacgcttttgcggtagttggcggcacggaggtaatgacgtttaaaaaccagaccgaaatccagcgggtcggaaaaggcaccgggatcagcggacgccgtttaccgctgacggccataaaacgttcaaactgctgctgataacttaatacctgcggcccggcggcttccagaatacgatgctcgtgcgcagggtgctccagtaagccgaccaggtagtagagtaaattttccagggcgatgggcgtggtgcgcgaacgcacccagcgcggcggcgtgagtattggcaggttgtaaaccatgtcgcgcatgacctcaaaggcggcggagcctgcgccgacgatgatcccggcgcgtaattccgtcaccggtacgcctgcgtcgcgcagcgtgtcagccgtaagctggcgggcgcgcaggtgatcggattgctcatgcgccggcgcctgcaatgaactgaggaaaataagttgtttaaccggcgtctggcgcagcgcgtcgcgcacgttgagcgccgcctgacgctcatgggcgataaagtcgccgccttcgcccatgccgtgtaccagatagtaaacggtatcaatgtcgcgaagcagcgcgggtaaattttccggccagtgcagatcgaccttatgacaactgacgttggcgaggcgatgtttttccagacgttccacgcgccgcgccgccgcccgcacctgatgtccttgctgacttagcgcaaagaccaggtgctgaccgatatagccgctggcgccgaggaccagaatgcgttgcgccacNNNNNNNNNNNctagatcacgtattcgatcaacgctggttcttgtttacagaagcgacgccagtcgacaatcggcattcgtacctgcggactgacgctaccgtcttcctccatccactctttttctattgcctgaagctgataaaaccggcttctcagacgcccttcctgcggcagcaaacgcagcgtatgctgcgccacctcgccggaaagacgctccgtcaaagcctgaaaaagctgtggtattcccacgccgctttgcgctgaaagccaaacgcggatgggtttattctcttcatctctgtcgatacgcggttcaaagtcgtccagcatatcgattttgttcatcaccattaaggtggggaattcgtgagcgtcaatctcttcaagaacggtgtttaccgcctcgatgttttcctgcacacgaacatccgccgcatcgaccacatgcagcagctgcgtcgcctgacgcgtctcctgcagggtagctttaaaggcagccaccagatcgtacggtaaatggcggataaagcctaccgtatccgccagaacggtctcaccgacatccgctacatcaatacgacgtaacgtggggtccagcgtcgcaaatagctgatctgccgcatagacccgcgcttcagtgatctgattaaaaagggtggattttccggcgttggtatagcccaccagcgataccgtcggaacgtcggccttgatgcgcgactgccgcccctgctcacgttgcttctcaactttctccaggcgcgactgaatctgcacaatgcgattacgcagtaaacgacggtcggcttcgagctgggtttcacccggaccgcgcgaaccaatcccgcctttccgacgttcaaggtgggtccagccacgcaccagacgcgtagccagatggcgtagctgcgccagctcaacctgcaacttaccttcatgggtacgcgcacgctgggcaaaaatatctaagataagaccggtgcgatcgataacccggcactcgcacaaacgctccaggtttcgctcctgggctggactcaatgcatgatcaaacaatacgaccgctgcgccagtcgctttcacggcttccgcaatttcaactgccttaccttcacctacaaagtacttcgggtgcggtgctttacggctaccggtaatcacctgcattgcttcgacaccggcgtaatagaccagagattcaaactcctggaggtcttccatatctttgtcttgcgaaaaatagatgtgtaccagtaccgcctgctcaccggcatcataacggtcaaacaaNNNNNNNNNNNtcagccgctaaacacgttaccggcgcccggcgcgctttttaacacccagacgcgaccatagtgattataccatccggcacgatgcccggcatccgggccaatcccctggtaaatatcaaagtgctggcctttaatcgctccgccgacatccagtgcgaccatcaaacgtagctcatactgaccgctaaatttaccgttgttatccagcaacggtacttccgccaacaaggttgtgcccggcggaatgatgctgcggtcggaggcgacggatgctcgcccaatcagcggtacagcgctggcgcctttgaccggcgcaaaagattgcggtttaaagaagacgaacgacgggttctgctccagtaattcacgcacttccgcttcgctgtgcttctctccccattcgcgtatagcctgcatcgacatatcttcttttttcacttcaccgcgatcgataagcactttaccaatactgcgataaggccagccatttttaccggcataactaaagaagttcagcggactaccatcaccgaaatcaatataaccgctgccctggacatccataataaagttatccatcagcgaattactccaggccaggatgtacttatcgctcagcgcgcctgcgtagatctgggcgcgggacggtaagcgtccgcgttttggcggcatactatagatagggtactggaacgcgccctggcgcgtatggcgagcctgaacgacgggcgtatagtagcccgtgaactggacgttaccgtagttgtcggtgccttccatctgccaggcatcgataccaaactgacgcatagtgcgcgtatcgcctccggaacgtaaccagttctggacagcgttatagacgttgctttgattggtgtataaacgcggcgacgcggaacggatctggtcgacctgctcggcaaagtcaccagcattaatcggcgcgcccaccgcgtccggctggtttaccagggagaagggctgggtaaatttcccgtccttatattgctgaccgcgatcggtcggttttgatgaacaggcagccagcattgccagcattacgcctgtcgccacatattttgcccaacgtcctttcatNNNNNNNNNNNtcattctgacacctccattttttgcgccattttggatgctctgtattcagggatggtggtcacaatcgcaccgactaacgcgaacagcgtaccgatgatggtgaccaggtagaccgtattgcctaatgaagggatcaattcatcaattagcactgagccaagcagttggcctgctgttgacgctacgcccagcatcaatagccctaagcctctcaccagaatcgccattagcccgatggatagcagacccagcggaccaccgagatacatccaccatgtatcgggtaactggatggtgacatggcctaatgcgatacgtatcgccagcgccgcgcccaggacacaaaagccgacgatgaagttccatgtaatggacaccagcatggagcccgttgcctcggcgactttcgcattccccgcaggctgccagccagcgagtaaccctgccaaaaaggggaggatagcgagcaggataaacgaggttgagtgccactgtggcgacacgacaaaaatggtggcgataacggcgaacaatgcgccagtaatgcgccatggcgtaaaatattttttctcctccacgccgatgccaaaacggtcgcacagcaggccggaaagaagcagagcggaaattaatgccgtttgaaaggtggcaacgcccagcgcgctggcggatgcgccttcagaaaatacgaccatcgccccgcataatcctgcaaaccaattccatagcgggatttttctctttttaatcagagtagggattgaggcgaattgctggcgtgtttctttgcgcgcaataataataaaaaacatgacgaccagaccgctggcaaacgagattactgcgcaagcattaccgtcttgtaaccaatgtcctaactgcccattaacggcagactgcatcggggaaagcataccggctaagatggtggcaagcatcagtaagggggttgagtacttattcttgttcatNNNNNNNNNNNtcagttaaacggttgtaagtcgacacgcgccatcattgcggccaactgaggacgatcggtaatacccacattgctctggctgaccgccagcgcggcaaccgccgtcgccaggcgcagcgtatgttcggtggactcgcgcatcagcaggccgtaaatccatccgccaaccatggaaccgcctgcgccgacggtattttccacgtcaaccgccggtggtttagcgatccattctcctgaggcgttaacccgcagcgcgccttccgcccccagcgaaatcaccacatgagcgataccctgttcgcgtaacgcgggcgccgcatcaatcacatctttcatttccgggagcttacgacccgcccaaatttccagttcgcggcgattcggtttcaccagccacggcgcagctataagaccggcgactaactctacacggctagtatcaacgataatgcatggacactggctgcgcagacgcgtcatccagtcggtgaacgcttccggactcacgccagccggtaagctaccgcttacgcagaccatatcgaactgacccagccagctcaggaagtcgttaacaaagcgttcccagtctgcgggagtcacgtcaaagccgggaaagttgaagttggtcacttcgccatctttttccgtcagcttcacgttgatgcgggtccggccctgaaccacctgaaagcggttagcgatacccagttcgctgaataattgctgaaaaccgtcctggttatctttaccgagaaaaccgccgacagtgacgtcgatgcctaagtctttcagcacattggcaacgttaatgcctttgcccgccgcgtgcagacccggggttttcaccaggttcacNNNNNNNNNNNttaccattgcgtgccaactcccacgctgtctaaccagtctgaaaccacatcatgcgcgctgtgcgcggttaaatccacatgcaacggcgtgacggagacgtagccttcatccaccgccgcgaaatcggtatccggcccggcatcgtatttatcacccggcgggccaatccagtacaatgtattaccgcgtggatcttcctgcgggatcactttatccgctggatggcggctaccgcagcgagtcacgcggatgcctttaacctgcgctaacggtagatccgggacattcacgttgagaatacgcccggtacgcaacggctcccggcttaaccctcgcaaaagcgcgcaagtcacggctgcagccgtatcataatgctgatagccgttaagggagaccgctaatgccggaaagccgagatgacgaccttccatcgccgcggcgacagtaccggaatagatcacatcatcgcccagattcggacccgcgttaataccggaaacgacaatatccggacgcggacgcattaaggcattaacgcccagatagacgcaatcggtcggcgtccccatctgtacagcgatatcgccattatcaaaggtaaaagtacgaagcgaagattccagcgtgagggaattagacgcgccgctgcggttacgatccggggctacgacctgtacatcagcaaactcacgcagcgctttcgccagcgtttgtataccgggcgcgtgaaccccgtcatcgttactcagcaatatgcgcatNNNNNNNNNNNttagtccccttcaaggagcaatacagacacaacaataatgataaaaatggcgaaaaacgacgctgttatcatcagcgcttcaagaaacggtggatcgtacatNNNNNNNNNNN----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- diff --git a/tests/test_data/genomes/G5.fasta b/tests/test_data/genomes/G5.fasta new file mode 100755 index 0000000..f469e79 --- /dev/null +++ b/tests/test_data/genomes/G5.fasta @@ -0,0 +1,2 @@ +>G5 +NNNNNNNNNNNatgtactgaacaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaaNNNNNNNNNNNatgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaaNNNNNNNNNNNgtgaacctggtgaaaaccacgggtctgcacgcggcgggcaaaggcattaacgttgccaaagtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccgacttcaacttttccggctttgacgtgactcccgcagactgggaacgctttgttaacgactccctgagctggctgggtcagttcgatatggtctgcgtcagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatctttgatagtagccgtgaagcgttagtcgccggtcttaaagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcacgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgtgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggtaagtaccgtcggcgcaggcgattccatggttggcggattgatttacggcctgctgatgcgcgagtccagcgaacatacgctgcgtctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactgaNNNNNNNNNNNatgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatgaNNNNNNNNNNNatgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctgaNNNNNNNNNNNttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctcttccgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccaacgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcagaaaggcgggattggtttgcgcggtccgggtgaaacccagctcgaaaccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgaaaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgctgctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaaatccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtctgcaggtacgaatgccgattgtcgactggcgtcgcctctgtaaacaagaaccagcgttgatcgaatacgtgatctagNNNNNNNNNNNgtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatgaNNNNNNNNNNNatgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctggtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatcagcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacgctgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcattcaaccggaatggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaacgctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgaggcgctggcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtgagctgaccagccttgtgcgcaaccttaatcaactgctcaaaagcgagcgcgaacgttataacaaataccgcacgaccctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatcagcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcataccccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttttggccgacagcatcccacacagaaagaggaataaNNNNNNNNNNNatgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataaNNNNNNNNNNNatgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtagNNNNNNNNNNNatgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacattgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccaccgtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggccgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattactggaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcattcagggcggtacggaagtttgtcagcaaaaactggataccatgcgcaacgcaggcgtaaaagtgaacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggcgtcagcgcagagatcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacaggagaccggcaagctcggcgagatcctgttctttatgcgtgcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccggcaataactggcagttcgacggcgacgcggaaactattgcccactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgaaattaccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaaNNNNNNNNNNNatgaaactttttctgacaacggcagcgctaactgcgaccttgatttccggcatggcatttgccagcgatcccgtcatcccctgggcgactaacagcggcggtacggaaagcacgcatattgcggcaatgggcgaggatttgaatgcgcagcatcagcagatcacccacacgcatgaaggcgtatgggcggccaattccggcagcattcaggctgatgaagccgcgctgaccagcaacaagccgccggtacaaggccacccggaattgatgccgcatcagggctaaNNNNNNNNNNNatgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaaNNNNNNNNNNNatggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaaNNNNNNNNNNNctgaaactaatacctttttatttgctcgcgctattttctgccgcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaaccactgcagataacccgccttataccatgacaatattgccgccagtagaaggctgcggggaaaaatgtgtgaaactgaacgtacagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaaggacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctataaccgacagctgcaattactttgggctggcgtaataccgaaaaataaaaaggggaaatggataacgaaaatggaaaattactggttcaggcaggtaaaagcgcactaaNNNNNNNNNNNatgaaaaaattttatagctgtcttcctgtctttttactgatcggctgtgcccaggtgcccctcccttcctccgtgagcaaaccggtacagcaacctggcgctcagaaagagcaactggccaacgcaaatagtattgatgagtgtcagtctcttccgtatgtgccgtcagaccttgcgaagaataaatcattatcaaaccagaacgctgataattccgcatcaaaaaatagcgcaatcagctcaagcattttttgcgaaaaatataaacaaaccaaagaacaggcgcttaccttcttccaggaacatccacaatacatgcgttcgaaagaagatgaagagcaactcatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataaNNNNNNNNNNNatgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaaNNNNNNNNNNNgtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggcctcacaacatgcctggagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgtcagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcaccatccagtcggcaacctggttttccgcaggcaggtcgcttacggcggataaaaaaatgatggattgcggcagcggtatttatgcttcaatcaatacgttactgaaaaagtcgcagaataaaaatatagtcatttttacccataatcattgtttgacgtatattgcaaaaaataagcgcggcgtgaagtttgatccggactatcttaatgcgctcgttatgtatgcggaaaacggaaaactctttttagacggtgaatttgttcccggctagNNNNNNNNNNNatgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatgaNNNNNNNNNNNatgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctagNNNNNNNNNNN----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- diff --git a/tests/test_data/genomes/G6.fasta b/tests/test_data/genomes/G6.fasta new file mode 100755 index 0000000..888c314 --- /dev/null +++ b/tests/test_data/genomes/G6.fasta @@ -0,0 +1,2 @@ +>G6 +NNNNNNNNNNNatgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtgattcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaaNNNNNNNNNNNatgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaaNNNNNNNNNNNgtgaacctggtgaaaaccccgggtctgcacgcggcgggcaaaggcattaacgttgccaatgtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccaacttcaactttcccggctttgacgtgactcccgcagactgggaacgctttgttaacgacttcctgagctggctgggtcagttcgatatggtctgcgtaagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatcgttgatactagccgtgtagagttagtcgccggtcttatagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcccgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgcgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggaaaataccgtcggcgcaggcggttccatggttggcggatggatttacggcctgctgatgcgcgagtccaccgaacatacgctgcgcctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactgaNNNNNNNNNNNatgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatgaNNNNNNNNNNNatgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctgaNNNNNNNNNNNttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctattacgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccagcgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcggaaaggcgggattggttcgcgcggtccgggtgaaacccagctcgaagccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgagaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgcagctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaattccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtccgcaggtacgaatgccgattgtcgactggcgtcgcttctgtaaacaagaaccagcgttgatcgaatacgtgatctagNNNNNNNNNNNgtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatgaNNNNNNNNNNNatgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctcgtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatctgcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacggtgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcactcaaccggaagggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaaggctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgatgcgctgtcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtaagctgatcagccttgtgcgcaaccttaatcaactgctcaaaagcgagcacgaacgttataacaaataccgcacgagcctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatctgcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcatagcccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttctggccgacagcatcccacacagaaagaggaataaNNNNNNNNNNNatgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataaNNNNNNNNNNNatgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtagNNNNNNNNNNNatgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacatcgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccacagtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggtcgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattacttgaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcatacagggcggtacggaagtttgtcagcaaaaactggataacatgcgcaacgcaggcgtaaaagtgtacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggggtcagcgcagagctcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacagaagaccggcaagctcggcgagatcctgttctttatgcgggcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccagcaatgactggcagttcgacggcgacgcggaaactattgcacactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgagatttccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaaNNNNNNNNNNNatgaaactttttctgacaacggcagcgctaactgcgaccttgacttccggcatgggatttgccagcgataccgtgatcccctgggcgactaacagtggcggtacggaaagcacgcatatcgcggcaatgggcgaggatgtgaatgcgcagcatcagcacatcatccacacgcatgaaggcgtatgtgccgcaaattccggcaccattcaggctgatgaagccgcgctgaccagcaacaagccaccggtacaagtccaaccggaattgttgccgcatcagggctaaNNNNNNNNNNNatgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaaNNNNNNNNNNNatggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaaNNNNNNNNNNNctgaaactaatacctttttatttgctcgcgctattttctgcctcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaacccctgcagataacccgccttataccgtgacaatattgccgccagtagaagcctgcggggaaaaatgtgtgaaactgaacgtagagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaacgacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctacaaccgacagctgcaattacttagggctggcgtaataccgaaaaataaaaagtggaaatggataacgaagatggaaaattactggttcaggcgggtaaaagcgcactaaNNNNNNNNNNNatgaaaaaatttgatagctgtcatcctgtctttttactgatcggctgtgcccaggtgcctctccgttcctccgtgagcaaaccggtacagcaacctagcgctcagaaagagcaactggccaacgcaaatggtattgatgagtgtcagtcacttccgtatgtgccttcagaccttgcgaagaataaatcattatcaaaccagatcgctgataataccgcatcaaaaaatagcgcaatcagctcacgcattttttgcgaaaaatataagcaaaccaaagaacaggcgcttaccttcttccaggaacttccacaatacatgcgttcgaaagaagttgaagagcaacacatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataaNNNNNNNNNNNatgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaaNNNNNNNNNNNgtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggactcacaacatgcctgtagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgccagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcgccatccagtcggcaacctggttttcggcaggcaggtcgcgtagggcggttaaaaaaatgatggattgcggcagcggtatttattcttcaatcaatacgttactgaaaaattcgcagattaaaaatatagtcattttaacccataatcattgtttgacgtatattgtaaaaaataagcgcggcgtgaagtttgatccggagtatcttaatgcgctcgttatgtatgcggaaaacggaaaactccttttagacggtgaatttgttcccggctagNNNNNNNNNNNatgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatgaNNNNNNNNNNNatgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctagNNNNNNNNNNN----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- diff --git a/tests/test_data/genomes/G7.fasta b/tests/test_data/genomes/G7.fasta new file mode 100755 index 0000000..6d5d1dc --- /dev/null +++ b/tests/test_data/genomes/G7.fasta @@ -0,0 +1,2 @@ +>G7 +NNNNNNNNNNNctagaatgaaccggaatgcggctctaaactcatcccataatcgctgattaatcgccacgttaccgtaccatgaggggcggcagtcgaactcaatttaaatgacgctgatgattttggcggaacgaaggtcgctgatttcacttcatggctgttaagcgtcacactggcaaagttcatataatagggggtagggttatttacggtaataacatcccctgccgtctgccattttaattgttggctctggctatcaggcgttgatttggttaatgctggcggtcgataaataagctttatttgggtattaatggagatttccacgcggttcgcggaggcattatcatcaatagaaggaatacccttaatattgagccagtacatagactcccgatctgcaggtagaggggcgccagagcgaataacccgaatactgttcttttgcccggcatcaaggcgaaaaagaggcggggtgataataaacacctgcttatttgtgacctggggatcggcaaccgataaccatgactgaacaagattcgctttgctgtctttattctctacattgattgacgactcatcattattgccgtcgaaaaccagtcgggttccgccaacaacaatgctggcatgggcaacatggcttgtcagtaatacgaccaacacggatttctttatgtgtttcatNNNNNNNNNNNtcatgccgcttgctcctgcacccacttcgtcaccgtggcgcgaatgtcgcgcatgacggcatccagcggctgggtcgcgtcaatggtgcggatacgcgagtcttgcgccgccagttccagatagcgcgcgcgagtccggttaaagaaatcaaaagattcctgctcaatgcgatccaaatcgccgcgcgctctggcgcgttttagcccgacttccggcgtgacatccagatacagcgtcaggtcaggacgaaaatcgcccagcacggcatcgcgtagtgtcgccagcatggtttgatcaatgccgcgccctcccccctgatacgcctgggtcgagagatcgtggcgatcgccgatcacccatacgccttgcgccagcgcgggtttgattaccgtttcgacgagctgtacgcgagcggcataaaacatcagcacttccgctttatcggtaatcacttcgtcgcctaccgatcggatatccagcaccagacttcttagtttttcggcaagctgcgtaccgcccggctcacgggtaaaaatcatgttacgaatacccagttgctcaagggtctccaccaccacgtcgcgcgcagtggtttttccggcgccttccaggccctcgatgacgatataattactgcccatNNNNNNNNNNNctagccgggaacaaattcaccgtctaaaaagagttttccgttttccgcatacataacgagcgcattaagatagtccggatcaaacttcacgccgcgcttattttttgcaatatacgtcaaacaatgattatgggtaaaaatgactatatttttattctgcgactttttcagtaacgtattgattgaagcataaataccgctgccgcaatccatcatttttttatccgccgtaagcgacctgcctgcggaaaaccaggttgccgactggatggtgcgcaccgtattactggaatagagattgtaattttgtatatcggcactgaaggctttacccagcgctctggcatcttgcgcaccgttgaccgtaatccccgtgctgtctgacaggcaggtattatcggaacgatcgcaccgctcggcatggcggaataacacgactaccggatgctgcttcgccagcgccgccagcgccttaccgttaatctgcggtagaccgttaccgctccaggcatgttgtgaggccaacccagcgataataaccaacgcccccgccagaatcgcgaaatagcgtttgtttttaataaagcgtagggtaaatgccagcacNNNNNNNNNNNttaaaccgcttgttcaaggctgaaactgtgacagtgaacctgcggtttcagcctgtcgttcataattttaatcgcatcgccaagctgcatggttcggcctgctatcaccacgcctggctgtgccagcagacacaacgcggcattttctcccggctcaacgaccagcaaattaacctgttccgccgtatcgcttaaccagacgcttgcggcatcacccgtgcccggggtccacatttcgccatgcgccacaaaatgccagcttttcggcatctgcggtttgagatagcgaatcgccaccagcgcattcaataccagctccgcgcgttgctctttggttaattcgaaatcccggcatttttcttcaaaggaaaaatagagcgcggcatcatccacacaaaaaccggtcgggcaaaacgcgtccggcgtaagcattttacgagagaagcgcgagcgaaaaagcataccattggcgagatcgagcatcatacgatcgtgctcttcatcataataccagcgccagttatcgtcaggtttaattcgcatNNNNNNNNNNNttataaggcttgcagtctttcatgggcagcaagtaacgtctgatatatgcttaaattcttacttccgggttcaagtagaacttttttaaattcggtcatgagttgctcttcatcttctttcgaacgcatgtattgtggatgttcctggaagaaggtaagcgcctgttctttggtttgtttatatttttcgcaaaaaatgcttgagctgattgcgctattttttgatgcggaattatcagcgttctggtttgataatgatttattcttcgcaaggtctgacggcacatacggaagagactgacactcatcaatactatttgcgttggccagttgctctttctgagcgccaggttgctgtaccggtttgctcacggaggaagggaggggcacctgggcacagccgatcagtaaaaagacaggaagacagctataaaattttttcatNNNNNNNNNNNttagtgcgcttttacctgcctgaaccagtaattttccattttcgttatccatttcccctttttatttttcggtattacgccagcccaaagtaattgcagctgtcggttataggttccaatagcgtgtccaccatacgccccttcaaatgctatcaccatgggtccctgcccttttaccccctcttgtccttcatggcaataaagcgcgttgcggtgagtcacgtcaagttcatactgtacgttcagtttcacacatttttccccgcagccttctactggcggcaatattgtcatggtataaggcgggttatctgcagtggttttccatgttccgatcaggtctttgcaggcgtttatctccgtagccccggaggcggcagaaaatagcgcgagcaaataaaaaggtattagtttcagNNNNNNNNNNNttagtcccaccaaacgtcgaaaagttcgctggttcggacttcttcaaggttgcgcgcttccagccacttacgcacaatcgcctgatgttcttcggtacatttaccgatttcctgcatacaaatcagcccttcccaggccaggtagccgctgccgtcaaacgccagtttattaggctcaataacgtcattaataaagtcatcgacagtcttatcgatctgctcttcagatgtaccttccggaaaacgccatgccaccgaaaatcctaattcctggaattcgtcaatgtgcatttttttacgcagacgacgactacggttctttgccatNNNNNNNNNNNttaaggcgtcacaatcagcagtccctcgctcgcgccggttgcccgccactgaggcgcgtacatcgattccacctgcggttgcgcaagctggtacgtccccggcgttaccgcgcgcgccaggtagaccagcgtcacgggctggccctcattgacaacgacggcagccacaaaccgatcgtcgcggaattccatatactgaatatccgcctgctgcatctgattaagcagattttgcacttcgctaccgctctccggcaggctggcgctgctgtcagccagattctggttttccagctccagcccggccgggagcaggtccaccaccagcgcatccggcacattgcgatcggccactaccgttaaccagaccagcaccagttcgccgctacgcaacgaggacagcgatttgcgctgaccatcggtccccagtatttgtcgttcaatctgcaaaacgttgctggcaggctcaggcgcagatgagggatagccgctgctatccagacgcagccatagcggctggctaccggtgttcgtcacctcaagggcggccagctgatcagcatccagattacgggtcagcgccttgtcgcccgacagcggctgcgcctctaacgaggtctgcgcctgccaggcgcccgcgctggcctgtcgcgaatgcgcggcgaggaacaaggcattgttctcctgggtagagagccagcgctgaccgaaggcctgctcagaaagcgagcttaatagcgcgttttgcgcgtccggtctgaggttgttctcttccagtaacgacaacatcagagcgttatcgcgcagagaactgccgtaatccgctatccattgccgttcgtcctgacgcggcgtattcagagccagcgtaatggcctcttcgccgcgtctggcatcacccatcgtgtttaacgcgatgcccaattgcatcagcggcagtcctgaagccgcctgactacggcgctcccagatttcgcgcagcgcgccgagcggcgctttctgctgacgcgccagtaccagcgcggcgtaagcctgagcggcaaaagtactggcctgggtattatcgctataacgaatcagcatcgtaccgggatcctgcagatagcgcagcagtcgctcattgccccggttaatggcctccggcgggacgctatatccctgctcgcccgcgcgaatgaggaaatccatcgcgtaggccgttagccagggctcttccgccccattttcatcccatagcgcaaaaccgccgttatcacgctgcatctgtagtatgcgggagatgccgatatccaccgcggcgcgccgtttttcatcgctatcgccggtaataccgagcgactgcaattgagcggcattggtatacagcgccgggaataacccgctggtggtttgttccaggcacccgtacggatatgctttcagctcgcgaatgtagcgcgccagattgagcggcggttttccgcttaacagcagttgtccctgtaacgtggctggcgagacgtttgccagatgctgctctggtacatgccagctctctcccggcgccagcgcaatgccgctatttaccgtttgggcaggccaggccggacgcacgccgatttgccactgcttatgctgcgcgccgagggtttctcccggcagattcagaccgctaatggtcgcctggatttcgccttcgccaaaaccttccagcgcgcgtaccggaacgaataaggtggtgcgcacgcccggcgccaggttgaccggttgcggctgttgactaagcagttccagtaacccactggcggcgagcgcaatattcagcgtctgcgggcggtcggtcagattggtgacgtccagcaccagtcgcgaaacatcccctcccgccagaaaacgcggcatattcagctcggcaatcactggcgcggcgacaacgactttgctttcgccgcgaccaaaatcgtccgctgtccatgcctgcgccataacccgcagttcgccgttaaagtcgccaatcggcagcgttacgaccccttcgccctgctcattgagcgtgatcggctgcgcctgctgcgcgatgatattggcatggtttaccggcggttttccgccgcgcgtaaggtcgtcgccatcgccgccaaaacgcaacgccgccagccgcccctgcccttcaatgacctggccgtaaatatcgtagatatccgcaccgtagcgtttttgaccgaagaacgcctgccacgggtccggcgtcgcgtaatcggtgatattcaatacaccgctatcgaccgcggagaccagcacgttgatctgttttggcatttcgccgtgtttaacgctggctttcaccctgacggtgagcggctgattcgggcgcattttagccgggctttccagcgccagatcgaggcggcggttgtcatcccccagcggtagatgtagtaaccccacggcgcgttttggcgtcgcggaacgagatttatcgccgggacgcaccaccagcgtactgagatagagatcgtggcgattccaggttttatccaccggaatcgtgagctccagcccctgcgccggcacgtcgatcgcctgccaccacagcggaccatcgctggactccaccatggcataacctttaccggcgaccggcgcggcgatatgcaatttcatggtgtcgcctggacgataattcgctttatccagtttgagggtgacgcgatccggacgcgccgcgccgctaccgtcgctgttatcctgccagctatagccggcccagaaacgaacgctgctgaccgtctcattcggcgctttgacctccagacggtacgcgccccattccaccgggaagctgacttttccggtttcatccgcgttcagatccagcgtctgctcgccctccaccagatctttttgatcaaactgcgactgccagccttcgctttccgaccagttccagtaatagtcgcgacgctcgcggatgagccgcacctgtaaaccggacaccgcttttttctcgccctgcgcgttggcgtaaacaatatcgaatgcggcgttgctgtcttcgtcgacaatcggttgattaacggtggtatccgtacggtagtcgtataccgctttggcggcgaactgtggacgaattcccggtaacgtatcggcaggccaaatcgcctgctctacgcgacgagtgaccggacgaccgccagactccagcaggctggcctgtagaattacctgcaacggcgaatgcgcttcttgccactggctggcagcactcacttcaccacgtccgcctttatccaacgtcagttgaacttcgtccaggctgcgcgaaagattctcttcggcaatattgccgaactggaagccaggcaacgccgcgacagcgtcgcgcagcggacgcaggaaaagttgcccttgcagggtattgccgttagcaggggcgccatacaagtaatagccgacaacggagaatttcacctcatccgcaggcgccagcggtgttttttgcgccgtcaggttgagcgccatccgctccggcataaagtcttccacgtggaaatcccaactccgcagcaaattatcgccggtgttggcgcggacatgccacaagccggtcggcgcgttgatatccagcggataattcaaacggtatagtccgttttccggctggctgacgacggtacgcatcacttgtccgtctggttttaccacttccagcttaacgggttgatcgggcagcgttttaccgtcgctatcgcgcagtaatccgttgaggataaccgtttcgcccggtcggtagagatcgcgtgggccgaacataaagaactgcttgctgtagccgggcgcgccagcgacattaaactccgacagatccagagccggaagcgtgagatcgagcagcgtggtctgcccctctttacgcgccagtaatagcgccgccgctttatcagcctccagttgcacatgtccctgcgcgtcgctcgtcgcctgcgccagcgtctgccctttatcattcagaagaacgatctcaattcccgactgcgccgcgccgttttccaggctttgcgtaaagatatccagccgactatggtaacggtgcgcggacacgccgatatcgctaagggtaaacaacgtagcggcattactatagttgtagtgtccagcctgattcattaccgccacatatacgcccgcctgttgcagcggcttaatatcgcttaatggcagcagcagtttctcacgcgtattacgcgccggattaagatcaaaacgaccggtataaaccagatccgccattttcagcagattgtcggattcccagttagagagggaactacggtactcccactggctgacaaacgacgccagcgatccgggcttaacgcggaaaaagttcacatcaacgtggttgacgttaagcgccatgaccggcagtccttccgctattttccccggtagcagcgatccccggctggcaaagccgacgctgggctggacatcacgcgtggtaatcgttttttcataagacttgccgaaggtggcgttattcagcgctttaacggcgggatcaaccgtgaccaccagcacgcgctcaggttccagatgacgtaaccttagctcttttaaatttggcgccagctcccatgcgccgtcaacgctgccgcttttcttatcaaccacgtgaaccacacgggagaaatcctgttcaggatctaaaggaattgaaaacgtcagcaccagcgtcgccgcgccgtcgagctgcgcttcggaggcgtctaatagcgtgagcgctttgccctggctttgctgcgccagcttttgtagctgtgacgggtcttgcgcgggcgagggctgcgctacggctggcgcttcgcttttagtcgtcggggcggttttatcgttgttatcgcatcccgccagcgccagcatgatcatgcaggccaccacgcgtaaatgtttcatNNNNNNNNNNNttagccctgatgcggcatcaattccgggtggccttgtaccggcggcttgttgctggtcagcgcggcttcatcagcctgaatgctgccggaattggccgcccatacgccttcatgcgtgtgggtgatctgctgatgctgcgcattcaaatcctcgcccattgccgcaatatgcgtgctttccgtaccgccgctgttagtcgcccaggggatgacgggatcgctggcaaatgccatgccggaaatcaaggtcgcagttagcgctgccgttgtcagaaaaagtttcatNNNNNNNNNNNttagatattccgtaaagaagcaaaaagtaaagcccactcgctcttcgcgcgatagaagaccggcggcttgccaatcggcgcatccacggtaatttcaccgccgtggtgagcttcgccggtccagatattcacccagtgatcttccggcaggtacagcgtccaatcgcaacgcccctgctcgtgaaccggcgcgaccagcagatcctgaccgagcagatattgatatttcagggtgtaggtagcggcatcgttctcgtagtgcaggaatagcggacgcatgaccggcagaccggtagccgcgttttgcgccaccgcctgcttgagatacggtttcagcgtggtaaagacggtggtcatgcgggcaaagtgggcaatagtttccgcgtcgccgtcgaactgccagttattgccggggcggttgccttcatgggtgcgcatcatcggcgtaaaggcgctgaaatcgcaccagcgcagcagcaactctttgctgcgcttcatgtcaaacagggtggtgtagccgccgatatcgctgtgatgcagaccatggccggtcatcgccagcgacaatgcagcaggcacgacagaggccagaccatcatcaagactccagtcaacgttctggtcgcctgcccacatcatggtggaatatttctgactgccggtgtaacccgcacgcataaagaacaggatctcgccgagcttgccggtctcctgtagcgcttcgtagttacacttcgcccacagtgcgggccaggcgttatgcatgatctctgcgctgacgccgttgtgcagatacgtgtcggtcggcagatattcgccgaaatccgccatccagccgctgcagccgagcgcgatcatgttctttttgatgacatctttgaaccagtcgtaagcttcaggattagtcagatcgaccacgccgccatagaattcgccaaactcgaccagatagtcgccgcccgtggcgtctttcgccagatagccgtgtctcgccgcctcagcgcagaggtctttatcactggcgacgtatgggttgatataagagaggaactggacgccttcttctttccactgtttgatccggctatccagctgtggatagttgtcgctattccacttccagttccacatcacgcgcttgccaaaggaggtcatgcggataccggaccagtcctgcgcccaaataccgttcacttttacgcctgcgttgcgcatggtatccagtttttgctgacaaacttccgtaccgccctgaatgccgagcgtgacgccgtcgtaaacccagtccggcagctccggctggcgacctaacagcgcagtcagtttttccagcagggcgatgtaggtgtcggcacactcaaaacgcagcgtagttttatcttcccacagcgccagttcgtgatactccggcgcgctgaagtcgaaattcatatagcagctattatcgacgtggcagtaatacttctgcgtgctgacaaaggtcggttgcgggaagaaggtccagtaatagtcgccgccggcgttctctttacagtctgcctgccaggtgacatagctggttttattacggccaacgccctgttcgctggtccacagcgggaacggcttgccgcgcaaatcgaaataagagaactgttcgccgcagccgtagatatggtcgtctggattagctgcgaggcgtaaccagatacggttatggtgcaggtcgtcgttttgcagatccaacgtcaggcgtcccgcctcatcggcggagatgcgaagggtggcgctaattgttgcgccacggctgaattgtaccagccagccgtcgggtagctcgctgacggtggcctccgttaatgcaatcttctcgttaagtttgtctttgatgctgaagttgccgcgaaacatgtcaatgtcggcaacgcccgcgccaatccacagacaggggttttcggcgctgtggcgtaaaatcaggcgctgttgccagctaagcgcaaaaccatcctgtgatgttgtcagttcaaaatcggttgaccgttgtggtagagaattcatNNNNNNNNNNNctactcatcttcaagataagtataaccgtacagtcccgcttcaaattcctcaaggaactgctgctgcaacgcatcgtccagatccgtctgttttacctggtcgcggaaatgcgttaatagcgttttcggatccagttgcacatattgcagcatatccgcaacggtatcgccttcgtccgacaactcaacctcgacactaccatccgggaagacaaacacgtcaaccgcttcagtatcgccaaacaggttgtgcatgttaccgaggatctcctgataggcgccgaccataaagaagccgagcatcggcggattctctggatcgtattccggcatcggcatcgtcgtggcgataccgtcgccatcgatatagtggtcgatagcgccatcggaatcacaggtaatatccagcagcacggcacgacgttccggtacctgatctaacccttccagcggcagcaccggaaagagctgatcgattccccacgcgtccggcatcgactggaacagcgagaagttgacgtacattttgtccgccatccgctcttgcagttcgtcgataatcgggcgatgcgcacggttttgcgggtccagttgcttctgcacttcatggcacatgctgagataaagttgctccgcccaggcgcgctcctgcaaactaaacgcgccggaagagtagccgatatgaatatcgtgcagatccatttggctatcatgcagccattcacgcagcgagcggcgggtgccaggcttatgcatctcctgccaggtttcccacagattttgcagcgcgcgcggcgcatcttcagcaggggcggtcggatccgtgtattcgttacgctccacgccgataatgttagagaccagtaccgtatggtgcgcagtgacggcgcgcccagactcggtaatcaccgtcggatgcggtaaaccatgctcttcgcaggcatcgccaatcgcccagatgatgttattggcatattcgttcaggccatagttcaccgaacagtcggactgcgagcgggtaccttcataatccacgcccagaccgccgcccacgtcgaagcactggatattaacgcccagcttatgcagctcaacatagaaacgcgcggactcgcgcacgccggtcgcgatatcgcgaatgttcgccatctgcgatcccaggtggaagtgcaacagttgcagactgtccagacgcccagcgtcacgcagggtctccaccagttgcagcacctgcgtcgccgccaggccgaattttgatttttcgccgccggaggattgccacttaccggacccctgagaggccagacgcgcacgcacgcccaggcgaggaaccacgttcaggcgctcggcctcttccagcacaatcgcgatttcagacatcttttcgatgaccagataaaccttatggcccatcttctcgccaatcagcgccagccgaatatattcacggtctttataaccattacagacgatcacgctacgggtcatgccggcatgcgccagcaccgccatcaattccgctttcgaccccgcttccagccccaacggttcaccggaatggataagggactcgatcacgcggcgatgctgattgaccttaatcggataaacgaggaagtagtcgccgttataaccgtaagattcacgcgcacgcttaaacgccgcgttaattgaacgcaaacggtgttgcaggatctgcgggaagcagaacagcgccggcagacgctgaccttgcgcttcgcgcgctttcaccagtttggcaagatcgacacgcgcttccggtacgtcgggatcggggcatacgctaatatggcccagctcgttgacgtcgtagtagttattgccccaccaggcaatattgtaagtgcgcagcatcttgctggcttcctgggagctcattgcaacctcctgcatNNNNNNNNNNNttatccgatacgactgacttcatcaaataaggtggctaacccgctgcgccgttccgttcgcgtcacaatcgcgcctgccaggatccgttcatcggcatacagcgataaccgccgccgcgcccgcgtaacagcggtatacaccagctcccgcgtcacgaccggcgaacgttggctgggtaaaatcagcgcggcgtgatcaaattcagacccctgtgatttatgtaccgtcatcgcccaggttgtatcatgttccggcagacggctgggctgaacggacttgatcgtgccgtccggcatcacaaaccagacgcgtaacccctgcccgcgatcgagcgcaataccaatatcgccgttaaatagccccaacgcgctatcgttgcgcgcaatcattaccggacgcccttcataccagcgagagtgcggatgccgctgaatttttcgttgctgcaccatcgcctgctcaatgcggtcattcagtcccctcacgccaaatgggccttcgcgcagcgcacaaagcagttgatactcattgaaagcctgaaggattgcctccggcgccgctttttcatgcagcaaccgcaggtagcgcccatagcccgccagcgcttcatccagcatcccggcataatcgtcgctgctttgcaatgtacgcttctctatatcgctaaacccctgctgaaaaacagcctggattgccgacctgtcgccacagttaattgccgccgccagcttgccgatgccagaatcgctgccgaaacggtagctcttttgcaacaaacagaggctatcgcgtaaagacgcggcttgcgttccggcccccgccggaatggcgctaccagtgagtcgacttagctgtcgggcgcgttccgccgtaaaccctgcgttgacataggcgcaaatatcgcccaacacagcgcccgcctcaacggatgccaactgatcgcgatcgccaagaaaaatgacccgcccgtgcggcggcagagcgtcaatcaaacgtgacatcatcggcaaatcaatcattgatgcctcatcgaccaccagcacgtccagatgcagcgggttgcccgcatgatggcgtaatcgctggctgccgggctgtgcgcccagcagtcggtgcagcgtactggcgtcctccggtatacgctttttctgcgcatcggtaagaggaagctgacgcaacgccgcgccgagcgactccgtcaggcgtgcggccgctttcccggttggcgccgccagccggatacggcaacgttcgccatccgccatttgaattaatgccgccagcagcttcgcgacggtggtggttttaccggtgccgggaccgcctgaaatcacggagatacggcgagttagcgctacggcggcggccaccttttgccagttcacctcgtctgtcggagggaatagcgcgtccagaatacgggataactgatcttcatctacggcgatggcctggttaacctcgttaaaaaagcgcgcaaccgtacgctcgttgcaccacatgcgattcaggtagaggcgatcgccgcacagaattaacggcgcggggctatcgccgcagctaaccgccgcagacgccagtaaccgctttttccagtcgattggcgtagccgtttcgcttatccaggcgaccagtaagggatgcgcctcctccgttaacgttaaacgcgacaacggcagacacacgtgaccttcacctgcgtcatgactaagcagcgctgccgccagcgtcacggcgggatcgtcgttaccggcgacggttaaagcaaactgggcatcaatgggccgtaagagtttttgttcaacggcctccagcaaccgcttctggattgtcatNNNNNNNNNNNttattcctctttctgtgtgggatgctgtcggccaaaaacgacctccatacgggcgccaccgagcagactgtcgctggcaatgatctgcccggcgtattgttccgtaatctcgcgcgcgacagccagccccacgccttgtcctggtcgtagggtatcggcgcgctgaccgcgatcaaacaccagggaacgtttgctgtggggtatgcctgggccgtcatcttcgacgaaaatatgcaaatgatcgtcggtctggcgagccgaaatctcgacaaactccagacaatatttacaagcgttgtccagtacgttgcccattacttcgacaaagtcgttttgctcgccgacaaaactgatctctggtgaaatatccatactgatattcacccctttacgctgataaactttattgagcgcggagatcaggttatctaacaacggcgcgacgggatgcagttcgcggcttaacaacacgccgctaccgcgcatactggcgcgatgcagataatagccgatctgctgggaaatccgactgatctgttccagcatcaccggttcagctttgctgacgctcatcttttcgttgcgtaaagagcgtaacgtactctgcaaaaccgcgagcggcgtttttaaactgtgcgtcaggtcggttagggtcgtgcggtatttgttataacgttcgcgctcgcttttgagcagttgattaaggttgcgcacaaggctggtcagctcacgcgtcgtctccggattgagcatttcgcggtgatgatcttcaagttcgcggacttcccgcgccagcgcctcgatagggcgtaagctccaccaggcggcgatccacagtaaaggaatgactaacagtaaattggcggccagcacgtatacgaaccagctccacaccatataggagcgttttagctctatcggaatggtatcgaccaccacgatggttaactgcggcatccgcgtcgtggcaggataaatatttaccgctaccgagtgggtcatctccgcatcatcgtcatcttcacgtacttctttgagtttttcctgcgcggaatggtcctcgctcaacagcgtgctggtggcgtctacgttggtttcaatttcatggaagccgttcgtttttaaccattccggttgaatgcttttaatcagccaggggatgttgcgctgcgtccataataatttgcccgtttcatcgtaaatcagcgtcatggtcgggctttgcatgtccagattttcaggcagctcaacgctgattttattattttcccatttggcgagggtataaaacaggttgctttcgccgcgcagcagacgaaacgtggttttatcaaaacttacgctatagccgaccagcgccactatgccatatgccagagaaagcaccagcacgacgccggctgtcgccagcaaaaaacgaacccgcagcgacagcggcagaaaatggcgagcaaatttattcatNNNNNNNNNNNtcatttttctgtgatttgttctgcaagtcgggcaatacgccttgccattccccggaaaataaacaggtgcgccgggatcatcagtagccagtaaatcaggcccggcattccgtgtggatgccaccaggcgcgcacgtcaatttcgcggtagcggcctttatcgtgcagcgtgaagctaagccgccccagacccggcgctttcatgccaaacaagagcgtgagctgtttttctggttcgacaatgatcactttccagctatctaccgtatcgccaggcttgagcaaggtatgcgacgggcggcctttcgccagtttatgccccaccagacggtccatcgcggcgcgcgtctgccacaaaatattgccgaaaaaatagccctctttgccacccagccgatttacgacctgccatagcgccgataggctggccggggtctgcgcggtaaagcccgcctgctttggaaaatagccgtattcgggacgccagcgggcgaaggccagcgcgtcgtagccccagtcgctggagttcaccagtttttcttcttctttcagcgtgcggcgaacggcgtcatcaaaggtgataagcgtttgggggatcaacttttttaacgcggcgtcatcggccagcaaatcgtgccttaatccctggattaacgcttttgcggtagttggcggcacggaggtaatgacgtttaaaaaccagaccgaaatccagcgggtcggaaaaggcaccgggatcagcggacgccgtttaccgctgacggccataaaacgttcaaactgctgctgataacttaatacctgcggcccggcggcttccagaatacgatgctcgtgcgcagggtgctccagtaagccgaccaggtagtagagtaaattttccagggcgatgggcgtggtgcgcgaacgcacccagcgcggcggcgtgagtattggcaggttgtaaaccatgtcgcgcatgacctcaaaggcggcggagcctgcgccgacgatgatcccggcgcgtaattccgtcaccggtacgcctgcgtcgcgcagcgtgtcagccgtaagctggcgggcgcgcaggtgatcggattgctcatgcgccggcgcctgcaatgaactgaggaaaataagttgtttaaccggcgtctggcgcagcgcgtcgcgcacgttgagcgccgcctgacgctcatgggcgataaagtcgccgccttcgcccatgccgtgtaccagatagtaaacggtatcaatgtcgcgaagcagcgcgggtaaattttccggccagtgcagatcgaccttatgacaactgacgttggcgaggcgatgtttttccagacgttccacgcgccgcgccgccgcccgcacctgatgtccttgctgacttagcgcaaagaccaggtgctgaccgatatagccgctggcgccgaggaccagaatgcgttgcgccacNNNNNNNNNNNctagatcacgtattcgatcaacgctggttcttgtttacagaggcgacgccagtcgacaatcggcattcgtacctgcagactgacgctaccgtcttcctccatccactctttttctattgcctgaagctgataaaaccggcttctcagacgcccttcctgcggcagcaaacgcagcgtatgctgcgccacctcgccggaaagacgctccgtcaaagcctgaaaaagctgtggtattcccacgccgctttgcgctgaaagccaaacgcggatgggtttattctcttcatctctgtcgatacgcggttcaaagtcgtccagcatatcgattttgttcatcaccattaaggtggggatttcgtgagcgtcaatctcttcaagaacggtgtttaccgcctcgatgttttcctgcacacgaacatccgccgcatcgaccacatgcagcagcagcgtcgcctgacgcgtctcctgcagggtagctttaaaggcagccaccagatcgtacggtaaatggcggataaagcctaccgtatccgccagaacggtttcaccgacatccgctacatcaatacgacgtaacgtggggtccagcgtcgcaaatagctgatctgccgcatagacccgcgcttcagtgatctgattaaaaagggtggattttccggcgttggtatagcccaccagcgataccgtcggaacgtcggccttgatgcgcgactgccgcccctgctcacgttgcttctcaactttctccaggcgcgactgaatctgcacaatgcgattacgcagtaaacgacggtcggtttcgagctgggtttcacccggaccgcgcaaaccaatcccgcctttctgacgttcaaggtgggtccagccacgcaccagacgcgtagccagatggcgtagctgcgccagctcaacctgcaacttaccttcatgggtacgcgcacgttgggcaaaaatatctaagataagaccggtgcgatcgataacccggcactcgcacaaacgctccaggtttcgctcctgggctggactcaatgcatgatcaaacaatacgaccgctgcgccagtcgctttcacggcttccgcaatttcaactgccttaccttcacctacaaagtacttcgggtgcggtgctttacggctaccggtaatcacctgcattgcttcgacaccggcggaagagaccagagattcaaactcctggaggtcttccatatctttgtcttgcgaaaaatagatgtgtaccagtaccgcctgctcaccggcatcataacggtcaaacaaNNNNNNNNNNNtcagccgctaaacacgttaccggcgcccggcgcgctttttaacacccagacgcgaccatagtgattataccatccggcacgatgcccggcatccgggccaatcccctggtaaatatcaaagtgctggcctttaatcgctccgccgacatccagtgcgaccatcaaacgtagctcatactgaccgctaaatttaccgttgttatccagcaacggtacttccgccaacaaggttgtgcccggcggaatgatgctgcggtcggaggcgacggatgctcgcccaatcagcggtacagcgctggcgcctttgaccggcgcaaaagattgcggtttaaagaagacgaacgacgggttctgctccagtaattcacgcacttccgcttcgctgtgcttctctccccattcgcgtatagcctgcatcgacatatcttcttttttcacttcaccgcgatcgataagcactttaccaatactgcgataaggccagccatttttaccggcataactaaagaagttcagcggactaccatcaccgaaatcaatataaccgctgccctggacatccataataaagttatccatcagcgaattactccaggccaggatgtacttatcgctcagcgcgcctgcgtagatctgggcgcgggacggtaagcgtccgcgttttggcggcatactatagatagggtactggaacgcgccctggcgcgtatggcgagcctgaacgacgggcgtatagtagcccgtgaactggacgttaccgtagttgtcggtgccttccatctgccaggcatcgataccaaactgacgcatagtgcgcgtatcgcctccggaacgtaaccagttctggacagcgttatagacgttgctttgattggtgtataaacgcggcgacgcggaacggatctggtcgacctgctcggcaaagtcaccagcattaatcggcgcgcccaccgcgtccggctggtttaccagggagaagggctgggtaaatttcccgtccttatattgctgaccgcgatcggtcggttttgatgaacaggcagccagcattgccagcattacgcctgtcgccacatattttgcccaacgtcctttcatNNNNNNNNNNNtcattctgacacctccattttttgcgccattttggatgctctgtattcagggatggtggtcacaatcgcaccgactaacgcgaacagcgtaccgatgatggtgaccaggtagaccgtattgcctaatgaagggatcaattcatcaattagcactgagccaagcagttggcctgctgttgacgctacgcccagcatcaatagccctaagcctctcaccagaatcgccattagcccgatggatagcagacccagcggaccaccgagatacatccaccatgtatcgggtaactggatggtgacatggcctaatgcgatacgtatcgccagcgccgcgcccaggacacaaaagccgacgatgaagttccatgtaatggacaccagcatggagcccgttgcctcggcgactttcgcattccccgcaggctgccagccagcgagtaaccctgccaaaaaggggaggatagcgagcaggataaacgaggttgagtgccactgtggcgacacgacaaaaatggtggcgataacggcgaacaatgcgccagtaatgcgccatggcgtaaaatattttttctcctccacgccgatgccaaaacggtcgcacagcaggccggaaagaagcagagcggaaattaatgccgtttgaaaggtggcaacgcccagcgcgctggcggatgcgccttcagaaaatacgaccatcgccccgcataatcctgcaaaccaattccatagcgggatttttctctttttaatcagagtagggattgaggcgaattgctggcgtgtttctttgcgcgcaataataataaaaaacatgacgaccagaccgctggcaaacgagattactgcgcaagcattaccgtcttgtaaccaatgtcctaactgcccattaacggcagactgcatcggggaaagcataccggctaagatggtggcaagcatcagtaagggggttgagtacttattcttgttcatNNNNNNNNNNNtcagttaaacggttgtaagtcgacacgcgccatcattgcggccaactgaggacgatcggtaatacccacattgctctggctgaccgccagcgcggcaaccgccgtcgccagacgcagcgtatgttcgctggactcgcgcatcagcaggccgtaaatcaatccgccaaccatggaatcgcctgcgccgacggtacttaccacgtcaaccgccggtggtttagcgatccattctcctgaggcgttaacccacagcgcgccttccgcccccagcgaaatcaccacatgagcgataccctgttcgcgtaacgcgtgcgccgcatcaatcacatctttcatttccgggagcttacgacccgcccaaatttccagttcgcggcgattcggtttcaccagccacggcgcagctttaagaccggcgactaacgcttcacggctactatcaaagataatgcatggacactggctgcgcagacgcgtcatccagtcggtgaacgcttccggactcacgccagccggtaagctaccgctgacgcagaccatatcgaactgacccagccagctcagggagtcgttaacaaagcgttcccagtctgcgggagtcacgtcaaagccggaaaagttgaagtcggtcacttcgccatctttttccgtcagcttcacgttgatgcgggtccggccctgaaccacctgaaagcggttagcgatacccagttcgctgaataattgctgaaaaccgtcctggttatctttaccgagaaaaccgccgacagtgacgtcgatgcctaagtctttcagcactttggcaacgttaatgcctttgcccgccgcgtgcagacccgtggttttcaccaggttcacNNNNNNNNNNNttaccattgcgtgccaactcccacgctgtctaaccagtctgaaaccacatcatgcgcgctgtgcgcggttaaatccacatgcaacggcgtgacggagacgtagccttcatccaccgccgcgaaatcggtatccggcccggcatcgtatttatcacccggcgggccaatccagtacaatgtattaccgcgtggatcttcctgcgggatcactttatccgctggatggcggctaccgcagcgagtcacgcggatgcctttaacctgcgctaacggtagatccgggacattcacgttgagaatacgcccggtacgcaacggctcccggcttaaccctcgcaaaagcgcgcaagtcacggctgcagccgtatcataatgctgatagccgttaagggagaccgctaatgccggaaagccgagatgacgaccttccatcgccgcggcgacagtaccggaatagatcacatcatcgcccagattcggacccgcgttaataccggaaacgacaatatccggacgcggacgcattaaggcattaacgcccagatagacgcaatcggtcggcgtccccatctgtacagcgatatcgccattatcaaaggtaaaagtacgaagcgaagattccagcgtgagggaattagacgcgccgctgcggttacgatccggggctacgacctgtacatcagcaaactcacgcagcgctttcgccagcgtttgtataccgggcgcgtgaaccccgtcatcgttactcagcaatatgcgcccNNNNNNNNNNNttagtccccttcaaggagcaatacagacacaacaataatgataaaaatggcgaaaaacgacgctgttatcatcagcgcttcaagaaacggtggatcgtacatNNNNNNNNNNN----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- diff --git a/tests/test_data/genomes/G8.fasta b/tests/test_data/genomes/G8.fasta new file mode 100755 index 0000000..f13b746 --- /dev/null +++ b/tests/test_data/genomes/G8.fasta @@ -0,0 +1,2 @@ +>G8 +NNNNNNNNNNNctagaatgaaccggaatgcggctctaaactcatcccataatcgctgattaatcgccacgttaccgtaccatgaggggcggcagtcgaactcaatttaaatgacgctgatgattttggcggaacgaaggtcgctgatttcacttcatggctgttaagcgtcacactggcaaagttcatataatagggggtagggttatttacggtaataacatcccctgccgtctgccattttaattgttggctctggctatcaggcgttgatttggttaatgctggcggtcgataaataagctttatttgggtattaatggagatttccacgcggttcgcggaggcattatcatcaatagaaggaatacccttaatattgagccagtacatagactcccgatctgcaggtagaggggcgccagagcgaataacccgaatactgttcttttgcccggcatcaaggcgaaaaagaggcggggtgataataaacacctgcttatttgtgacctggggatcggcaaccgataaccatgactgaacaagattcgctttgctgtctttattctctacattgattgacgactcatcattattgccgtcgaaaaccagtcgggttccgccaacaacaatgctggcatgggcaacatggcttgtcagtaatacgaccaacacggatttctttatgtgtttcatNNNNNNNNNNNtcatgccgcttgctcctgcacccacttcgtcaccgtggcgcgaatgtcgcgcatgacggcatccagcggctgggtcgcgtcaatggtgcggatacgcgagtcttgcgccgccagttccagatagcgcgcgcgagtccggttaaagaaatcaaaagattcctgctcaatgcgatccaaatcgccgcgcgctctggcgcgttttagcccgacttccggcgtgacatccagatacagcgtcaggtcaggacgaaaatcgcccagcacggcatcgcgtagtgtcgccagcatggtttgatcaatgccgcgccctcccccctgatacgcctgggtcgagagatcgtggcgatcgccgatcacccatacgccttgcgccagcgcgggtttgattaccgtttcgacgagctgtacgcgagcggcataaaacatcagcacttccgctttatcggtaatcacttcgtcgcctaccgatcggatatccagcaccagacttcttagtttttcggcaagctgcgtaccgcccggctcacgggtaaaaatcatgttacgaatacccagttgctcaagggtctccaccaccacgtcgcgcgcagtggtttttccggcgccttccaggccctcgatgacgatataattactgcccatNNNNNNNNNNNctagccgggaacaaattcaccgtctaaaaggagttttccgttttccgcatacataacgagcgcattaagatactccggatcaaacttcacgccgcgcttattttttacaatatacgtcaaacaatgattatgggttaaaatgactatatttttaatctgcgaatttttcagtaacgtattgattgaagaataaataccgctgccgcaatccatcatttttttaaccgccctacgcgacctgcctgccgaaaaccaggttgccgactggatggcgcgcaccgtattactggaatagagattgtaattttgtatatcggcactgaaggctttacccagcgctctggcatcttgcgcaccgttgaccgtaatccccgtgctgtctggcaggcaggtattatcggaacgatcgcaccgctcggcatggcggaataacacgactaccggatgctgcttcgccagcgccgccagcgccttaccgttaatctgcggtagaccgttaccgctacaggcatgttgtgagtccaacccagcgataataaccaacgcccccgccagaatcgcgaaatagcgtttgtttttaataaagcgtagggtaaatgccagcacNNNNNNNNNNNttaaaccgcttgttcaaggctgaaactgtgacagtgaacctgcggtttcagcctgtcgttcataattttaatcgcatcgccaagctgcatggttcggcctgctatcaccacgcctggctgtgccagcagacacaacgcggcattttctcccggctcaacgaccagcaaattaacctgttccgccgtatcgcttaaccagacgcttgcggcatcacccgtgcccggggtccacatttcgccatgcgccacaaaatgccagcttttcggcatctgcggtttgagatagcgaatcgccaccagcgcattcaataccagctccgcgcgttgctctttggttaattcgaaatcccggcatttttcttcaaaggaaaaatagagcgcggcatcatccacacaaaaaccggtcgggcaaaacgcgtccggcgtaagcattttacgagagaagcgcgagcgaaaaagcataccattggcgagatcgagcatcatacgatcgtgctcttcatcataataccagcgccagttatcgtcaggtttaattcgcatNNNNNNNNNNNttataaggcttgcagtctttcatgggcagcaagtaacgtctgatatatgcttaaattcttacttccgggttcaagtagaacttttttaaattcggtcatgtgttgctcttcaacttctttcgaacgcatgtattgtggaagttcctggaagaaggtaagcgcctgttctttggtttgcttatatttttcgcaaaaaatgcgtgagctgattgcgctattttttgatgcggtattatcagcgatctggtttgataatgatttattcttcgcaaggtctgaaggcacatacggaagtgactgacactcatcaataccatttgcgttggccagttgctctttctgagcgctaggttgctgtaccggtttgctcacggaggaacggagaggcacctgggcacagccgatcagtaaaaagacaggatgacagctatcaaattttttcatNNNNNNNNNNNttagtgcgcttttacccgcctgaaccagtaattttccatcttcgttatccatttccactttttatttttcggtattacgccagccctaagtaattgcagctgtcggttgtaggttccaatagcgtgtccaccatacgccccttcaaatgctatcaccatgggtccctgcccttttaccccctcttgtcgttcatggcaataaagcgcgttgcggtgagtcacgtcaagttcatactctacgttcagtttcacacatttttccccgcaggcttctactggcggcaatattgtcacggtataaggcgggttatctgcaggggttttccatgttccgatcaggtctttgcaggcgtttatctccgtagccccggaggaggcagaaaatagcgcgagcaaataaaaaggtattagtttcagNNNNNNNNNNNttagtcccaccaaacgtcgaaaagttcgctggttcggacttcttcaaggttgcgcgcttccagccacttacgcacaatcgcctgatgttcttcggtacatttaccgatttcctgcatacaaatcagcccttcccaggccaggtagccgctgccgtcaaacgccagtttattaggctcaataacgtcattaataaagtcatcgacagtcttatcgatctgctcttcagatgtaccttccggaaaacgccatgccaccgaaaatcctaattcctggaattcgtcaatgtgcatttttttacgcagacgacgactacggttctttgccatNNNNNNNNNNNttaaggcgtcacaatcagcagtccctcgctcgcgccggttgcccgccactgaggcgcgtacatcgattccacctgcggttgcgcaagctggtacgtccccggcgttaccgcgcgcgccaggtagaccagcgtcacgggctggccctcattgacaacgacggcagccacaaaccgatcgtcgcggaattccatatactgaatatccgcctgctgcatctgattaagcagattttgcacttcgctaccgctctccggcaggctggcgctgctgtcagccagattctggttttccagctccagcccggccgggagcaggtccaccaccagcgcatccggcacattgcgatcggccactaccgttaaccagaccagcaccagttcgccgctacgcaacgaggacagcgatttgcgctgaccatcggtccccagtatttgtcgttcaatctgcaaaacgttgctggcaggctcaggcgcagatgagggatagccgctgctatccagacgcagccatagcggctggctaccggtgttcgtcacctcaagggcggccagctgatcagcatccagattacgggtcagcgccttgtcgcccgacagcggctgcgcctctaacgaggtctgcgcctgccaggcgcccgcgctggcctgtcgcgaatgcgcggcgaggaacaaggcattgttctcctgggtagagagccagcgctgaccgaaggcctgctcagaaagcgagcttaatagcgcgttttgcgcgtccggtctgaggttgttctcttccagtaacgacaacatcagagcgttatcgcgcagagaactgccgtaatccgctatccattgccgttcgtcctgacgcggcgtattcagagccagcgtaatggcctcttcgccgcgtctggcatcacccatcgtgtttaacgcgatgcccaattgcatcagcggcagtcctgaagccgcctgactacggcgctcccagatttcgcgcagcgcgccgagcggcgctttctgctgacgcgccagtaccagcgcggcgtaagcctgagcggcaaaagtactggcctgggtattatcgctataacgaatcagcatcgtaccgggatcctgcagatagcgcagcagtcgctcattgccccggttaatggcctccggcgggacgctatatccctgctcgcccgcgcgaatgaggaaatccatcgcgtaggccgttagccagggctcttccgccccattttcatcccatagcgcaaaaccgccgttatcacgctgcatctgtagtatgcgggagatgccgatatccaccgcggcgcgccgtttttcatcgctatcgccggtaataccgagcgactgcaattgagcggcattggtatacagcgccgggaataacccgctggtggtttgttccaggcacccgtacggatatgctttcagctcgcgaatgtagcgcgccagattgagcggcggttttccgcttaacagcagttgtccctgtaacgtggctggcgagacgtttgccagatgctgctctggtacatgccagctctctcccggcgccagcgcaatgccgctatttaccgtttgggcaggccaggccggacgcacgccgatttgccactgcttatgctgcgcgccgagggtttctcccggcagattcagaccgctaatggtcgcctggatttcgccttcgccaaaaccttccagcgcgcgtaccggaacgaataaggtggtgcgcacgcccggcgccaggttgaccggttgcggctgttgactaagcagttccagtaacccactggcggcgagcgcaatattcagcgtctgcgggcggtcggtcagattggtgacgtccagcaccagtcgcgaaacatcccctcccgccagaaaacgcggcatattcagctcggcaatcactggcgcggcgacaacgactttgctttcgccgcgaccaaaatcgtccgctgtccatgcctgcgccataacccgcagttcgccgttaaagtcgccaatcggcagcgttacgaccccttcgccctgctcattgagcgtgatcggctgcgcctgctgcgcgatgatattggcatggtttaccggcggttttccgccgcgcgtaaggtcgtcgccatcgccgccaaaacgcaacgccgccagccgcccctgcccttcaatgacctggccgtaaatatcgtagatatccgcaccgtagcgtttttgaccgaagaacgcctgccacgggtccggcgtcgcgtaatcggtgatattcaatacaccgctatcgaccgcggagaccagcacgttgatctgttttggcatttcgccgtgtttaacgctggctttcaccctgacggtgagcggctgattcgggcgcattttagccgggctttccagcgccagatcgaggcggcggttgtcatcccccagcggtagatgtagtaaccccacggcgcgttttggcgtcgcggaacgagatttatcgccgggacgcaccaccagcgtactgagatagagatcgtggcgattccaggttttatccaccggaatcgtgagctccagcccctgcgccggcacgtcgatcgcctgccaccacagcggaccatcgctggactccaccatggcataacctttaccggcgaccggcgcggcgatatgcaatttcatggtgtcgcctggacgataattcgctttatccagtttgagggtgacgcgatccggacgcgccgcgccgctaccgtcgctgttatcctgccagctatagccggcccagaaacgaacgctgctgaccgtctcattcggcgctttgacctccagacggtacgcgccccattccaccgggaagctgacttttccggtttcatccgcgttcagatccagcgtctgctcgccctccaccagatctttttgatcaaactgcgactgccagccttcgctttccgaccagttccagtaatagtcgcgacgctcgcggatgagccgcacctgtaaaccggacaccgcttttttctcgccctgcgcgttggcgtaaacaatatcgaatgcggcgttgctgtcttcgtcgacaatcggttgattaacggtggtatccgtacggtagtcgtataccgctttggcggcgaactgtggacgaattcccggtaacgtatcggcaggccaaatcgcctgctctacgcgacgagtgaccggacgaccgccagactccagcaggctggcctgtagaattacctgcaacggcgaatgcgcttcttgccactggctggcagcactcacttcaccacgtccgcctttatccaacgtcagttgaacttcgtccaggctgcgcgaaagattctcttcggcaatattgccgaactggaagccaggcaacgccgcgacagcgtcgcgcagcggacgcaggaaaagttgcccttgcagggtattgccgttagcaggggcgccatacaagtaatagccgacaacggagaatttcacctcatccgcaggcgccagcggtgttttttgcgccgtcaggttgagcgccatccgctccggcataaagtcttccacgtggaaatcccaactccgcagcaaattatcgccggtgttggcgcggacatgccacaagccggtcggcgcgttgatatccagcggataattcaaacggtatagtccgttttccggctggctgacgacggtacgcatcacttgtccgtctggttttaccacttccagcttaacgggttgatcgggcagcgttttaccgtcgctatcgcgcagtaatccgttgaggataaccgtttcgcccggtcggtagagatcgcgtgggccgaacataaagaactgcttgctgtagccgggcgcgccagcgacattaaactccgacagatccagagccggaagcgtgagatcgagcagcgtggtctgcccctctttacgcgccagtaatagcgccgccgctttatcagcctccagttgcacatgtccctgcgcgtcgctcgtcgcctgcgccagcgtctgccctttatcattcagaagaacgatctcaattcccgactgcgccgcgccgttttccaggctttgcgtaaagatatccagccgactatggtaacggtgcgcggacacgccgatatcgctaagggtaaacaacgtagcggcattactatagttgtagtgtccagcctgattcattaccgccacatatacgcccgcctgttgcagcggcttaatatcgcttaatggcagcagcagtttctcacgcgtattacgcgccggattaagatcaaaacgaccggtataaaccagatccgccattttcagcagattgtcggattcccagttagagagggaactacggtactcccactggctgacaaacgacgccagcgatccgggcttaacgcggaaaaagttcacatcaacgtggttgacgttaagcgccatgaccggcagtccttccgctattttccccggtagcagcgatccccggctggcaaagccgacgctgggctggacatcacgcgtggtaatcgttttttcataagacttgccgaaggtggcgttattcagcgctttaacggcgggatcaaccgtgaccaccagcacgcgctcaggttccagatgacgtaaccttagctcttttaaatttggcgccagctcccatgcgccgtcaacgctgccgcttttcttatcaaccacgtgaaccacacgggagaaatcctgttcaggatctaaaggaattgaaaacgtcagcaccagcgtcgccgcgccgtcgagctgcgcttcggaggcgtctaatagcgtgagcgctttgccctggctttgctgcgccagcttttgtagctgtgacgggtcttgcgcgggcgagggctgcgctacggctggcgcttcgcttttagtcgtcggggcggttttatcgttgttatcgcatcccgccagcgccagcatgatcatgcaggccaccacgcgtaaatgtttcatNNNNNNNNNNNttagccctgatgcggcaacaattccggttggacttgtaccggtggcttgttgctggtcagcgcggcttcatcagcctgaatggtgccggaatttgcggcacatacgccttcatgcgtgtggatgatgtgctgatgctgcgcattcacatcctcgcccattgccgcgatatgcgtgctttccgtaccgccactgttagtcgcccaggggatcacggtatcgctggcaaatcccatgccggaagtcaaggtcgcagttagcgctgccgttgtcagaaaaagtttcatNNNNNNNNNNNttagatattccgtaaagaagcaaaaagtaaagcccactcgctcttcgcgcgatagaagaccggcggcttgccaatcggcgcatccacggaaatctcaccgccgtggtgagcttcgccggtccagatattcacccagtgatcttccggcaggtacagcgtccaatcgcaacgcccctgctcgtgaaccggcgcgaccagcagatcctgaccgagcagatattgatatttcagggtgtaggtagcggcatcgttctcgtagtgcaggaatagcggacgcatgaccggcagaccggtagccgcgttttgcgccaccgcctgcttgagatacggtttcagcgtggtaaagacggtggtcatgcgggcaaagtgtgcaatagtttccgcgtcgccgtcgaactgccagtcattgctggggcggttgccttcatgggtgcgcatcatcggcgtaaaggcgctgaaatcgcaccagcgcagcagcaactctttgctgcgcttcatgtcaaacagggtggtgtagccgccgatatcgctgtgatgcagaccatggccggtcatcgccagcgacaatgcagcaggcacgacagaggccagaccatcatcaagactccagtcaacgttctggtcgcctgcccacatcatggtggaatatttctgactgccggtgtaacccgcccgcataaagaacaggatctcgccgagcttgccggtcttctgtagcgcttcgtagttacacttcgcccacagtgcgggccaggcgttatgcatgagctctgcgctgaccccgttgtgcagatacgtgtcggtcggcagatattcgccgaaatccgccatccagccgctgcagccgagcgcgatcatgttctttttgatgacatctttgaaccagtcgtaagcttcaggattagtcagatcgaccacgccgccatagaattcgccaaactcgaccagatagtcgccgcccgtggcgtctttcgccagatagccgtgtctcgccgcctcagcgcagaggtctttatcactggcgacgtatgggttgatataagagaggaactggacgccttcttctttccactgtttgatccggctatccagctgtggatagttgtcgctattccacttccagttccacatcacgcgcttgccaaaggaggtcatgcggataccggaccagtcctgcgcccaaataccgtacacttttacgcctgcgttgcgcatgttatccagtttttgctgacaaacttccgtaccgccctgtatgccgagcgtgacgccgtcgtaaacccagtccggcagctccggctggcgacctaacagcgcagtcagtttttccagcagggcgatgtaggtgtcggcacactcaaaacgcagcgtagttttatcttcccacagcgccagttcgtgatactccggcgcgctgaagtcgaaattcatatagcagctattatcgacgtggcagtaatacttctgcgtgctgacaaaggtcggttgcgggaagaaggtcaagtaatagtcgccgccggcgttctctttacagtctgcctgccaggtgacatagctggttttattacgaccaacgccctgttcgctggtccacagcgggaacggcttgccgcgcaaatcgaaataagagaactgttcgccgcagccgtagatatggtcgtctggattagctgcgaggcgtaaccagatacggttatggtgcaggtcgtcgttttgcagatccaacgtcaggcgtcccgcctcatcggcggagatgcgaagggtggcgctaattgttgcgccacggctgaattgtaccagccagccgtcgggtagctcgctgactgtggcctccgttaatgcaatcttctcgttaagtttgtctttgatgctgaagttgccgcgaaacatgtcgatgtcggcaacgcccgcgccaatccacagacaggggttttcggcgctgtggcgtaaaatcaggcgctgttgccagctaagcgcaaaaccatcctgtgatgttgtcagttcaaaatcggttgaccgttgtggtagagaattcatNNNNNNNNNNNctactcatcttcaagataagtataaccgtacagtcccgcttcaaattcctcaaggaactgctgctgcaacgcatcgtccagatccgtctgttttacctggtcgcggaaatgcgttaatagcgttttcggatccagttgcacatattgcagcatatccgcaacggtatcgccttcgtccgacaactcaacctcgacactaccatccgggaagacaaacacgtcaaccgcttcagtatcgccaaacaggttgtgcatgttaccgaggatctcctgataggcgccgaccataaagaagccgagcatcggcggattctctggatcgtattccggcatcggcatcgtcgtggcgataccgtcgccatcgatatagtggtcgatagcgccatcggaatcacaggtaatatccagcagcacggcacgacgttccggtacctgatctaacccttccagcggcagcaccggaaagagctgatcgattccccacgcgtccggcatcgactggaacagcgagaagttgacgtacattttgtccgccatccgctcttgcagttcgtcgataatcgggcgatgcgcacggttttgcgggtccagttgcttctgcacttcatggcacatgctgagataaagttgctccgcccaggcgcgctcctgcaaactaaacgcgccggaagagtagccgatatgaatatcgtgcagatccatttggctatcatgcagccattcacgcagcgagcggcgggtgccaggcttatgcatctcctgccaggtttcccacagattttgcagcgcgcgcggcgcatcttcagcaggggcggtcggatccgtgtattcgttacgctccacgccgataatgttagagaccagtaccgtatggtgcgcagtgacggcgcgcccagactcggtaatcaccgtcggatgcggtaaaccatgctcttcgcaggcatcgccaatcgcccagatgatgttattggcatattcgttcaggccatagttcaccgaacagtcggactgcgagcgggtaccttcataatccacgcccagaccgccgcccacgtcgaagcactggatattaacgcccagcttatgcagctcaacatagaaacgcgcggactcgcgcacgccggtcgcgatatcgcgaatgttcgccatctgcgatcccaggtggaagtgcaacagttgcagactgtccagacgcccagcgtcacgcagggtctccaccagttgcagcacctgcgtcgccgccaggccgaattttgatttttcgccgccggaggattgccacttaccggacccctgagaggccagacgcgcacgcacgcccaggcgaggaaccacgttcaggcgctcggcctcttccagcacaatcgcgatttcagacatcttttcgatgaccagataaaccttatggcccatcttctcgccaatcagcgccagccgaatatattcacggtctttataaccattacagacgatcacgctacgggtcatgccggcatgcgccagcaccgccatcaattccgctttcgaccccgcttccagccccaacggttcaccggaatggataagggactcgatcacgcggcgatgctgattgaccttaatcggataaacgaggaagtagtcgccgttataaccgtaagattcacgcgcacgcttaaacgccgcgttaattgaacgcaaacggtgttgcaggatctgcgggaagcagaacagcgccggcagacgctgaccttgcgcttcgcgcgctttcaccagtttggcaagatcgacacgcgcttccggtacgtcgggatcggggcatacgctaatatggcccagctcgttgacgtcgtagtagttattgccccaccaggcaatattgtaagtgcgcagcatcttgctggcttcctgggagctcattgcaacctcctgcatNNNNNNNNNNNttatccgatacgactgacttcatcaaataaggtggctaacccgctgcgccgttccgttcgcgtcacaatcgcgcctgccaggatccgttcatcggcatacagcgataaccgccgccgcgcccgcgtaacagcggtatacaccagctcccgcgtcacgaccggcgaacgttggctgggtaaaatcagcgcggcgtgatcaaattcagacccctgtgatttatgtaccgtcatcgcccaggttgtatcatgttccggcagacggctgggctgaacggacttgatcgtgccgtccggcatcacaaaccagacgcgtaacccctgcccgcgatcgagcgcaataccaatatcgccgttaaatagccccaacgcgctatcgttgcgcgcaatcattaccggacgcccttcataccagcgagagtgcggatgccgctgaatttttcgttgctgcaccatcgcctgctcaatgcggtcattcagtcccctcacgccaaatgggccttcgcgcagcgcacaaagcagttgatactcattgaaagcctgaaggattgcctccggcgccgctttttcatgcagcaaccgcaggtagcgcccatagcccgccagcgcttcatccagcatcccggcataatcgtcgctgctttgcaatgtacgcttctctatatcgctaaacccctgctgaaaaacagcctggattgccgacctgtcgccacagttaattgccgccgccagcttgccgatgccagaatcgctgccgaaacggtagctcttttgcaacaaacagaggctatcgcgtaaagacgcggcttgcgttccggcccccgccggaatggcgctaccagtgagtcgacttagctgtcgggcgcgttccgccgtaaaccctgcgttgacataggcgcaaatatcgcccaacacagcgcccgcctcaacggatgccaactgatcgcgatcgccaagaaaaatgacccgcccgtgcggcggcagagcgtcaatcaaacgtgacatcatcggcaaatcaatcattgatgcctcatcgaccaccagcacgtccagatgcagcgggttgcccgcatgatggcgtaatcgctggctgccgggctgtgcgcccagcagtcggtgcagcgtactggcgtcctccggtatacgctttttctgcgcatcggtaagaggaagctgacgcaacgccgcgccgagcgactccgtcaggcgtgcggccgctttcccggttggcgccgccagccggatacggcaacgttcgccatccgccatttgaattaatgccgccagcagcttcgcgacggtggtggttttaccggtgccgggaccgcctgaaatcacggagatacggcgagttagcgctacggcggcggccaccttttgccagttcacctcgtctgtcggagggaatagcgcgtccagaatacgggataactgatcttcatctacggcgatggcctggttaacctcgttaaaaaagcgcgcaaccgtacgctcgttgcaccacatgcgattcaggtagaggcgatcgccgcacagaattaacggcgcggggctatcgccgcagctaaccgccgcagacgccagtaaccgctttttccagtcgattggcgtagccgtttcgcttatccaggcgaccagtaagggatgcgcctcctccgttaacgttaaacgcgacaacggcagacacacgtgaccttcacctgcgtcatgactaagcagcgctgccgccagcgtcacggcgggatcgtcgttaccggcgacggttaaagcaaactgggcatcaatgggccgtaagagtttttgttcaacggcctccagcaaccgcttctggattgtcatNNNNNNNNNNNttattcctctttctgtgtgggatgctgtcggccagaaacgacctccatacgggcgccaccgagcagactgtcgctggcaatgatctgcccggcgtattgttccgtaatctcgcgcgcgacagccagccccacgccttgtcctggtcgtagggtatcggcgcgctgaccgcgatcaaacaccagggaacgtttgctgtgggctatgcctgggccgtcatcttcgacgaaaatatgcaaatgatcgtcggtctggcgagccgaaatctcgacaaactccagacaatatttacaagcgttgtccagtacgttgcccattacttcgacaaagtcgttttgctcgccgacaaaactgatctctggtgaaatatccatactgatattcacccctttacgcagataaactttattgagcgcggagatcaggttatctaacaacggcgcgacgggatgcagttcgcggcttaacaacacgccgctaccgcgcatactggcgcgatgcagataatagccgatctgctgggaaatccgactgatctgttccagcatcaccggttcagctttgctgacgctcatcttttcgttgcgtaaagagcgtaacgtactctgcaaaaccgcgagcggcgtttttaaactgtgcgtcaggtcggttaggctcgtgcggtatttgttataacgttcgtgctcgcttttgagcagttgattaaggttgcgcacaaggctgatcagcttacgcgtcgtctccggattgagcatttcgcggtgatgatcttcaagttcgcggacttcccgcgacagcgcatcgatagggcgtaagctccaccaggcggcgatccacagtaaaggaatgactaacagtaaattggcggccagcacgtatacgaaccagctccacaccatataggagccttttagctctatcggaatggtatcgaccaccacgatggttaactgcggcatccgcgtcgtggcaggataaatatttaccgctaccgagtgggtcatctccgcatcatcgtcatcttcacgtacttctttgagtttttcctgcgcggaatggtcctcgctcaacagcgtgctggtggcgtctacgttggtttcaatttcatggaagccgttcgtttttaacccttccggttgagtgcttttaatcagccaggggatgttgcgctgcgtccataataatttgcccgtttcatcgtaaatcaccgtcatggtcgggctttgcatgtccagattttcaggcagctcaacgcagattttattattttcccatttggcgagggtataaaacaggttgctttcgccgcgcagcagacgaaacgtggttttatcaaaacttacgctatagccgaccagcgccactatgccatatgccagagaaagcacgagcacgacgccggctgtcgccagcaaaaaacgaacccgcagcgacagcggcagaaaatggcgagcaaatttattcatNNNNNNNNNNNtcatttttctgtgatttgttctgcaagtcgggcaatacgccttgccattccccggaaaataaacaggtgcgccgggatcatcagtagccagtaaatcaggcccggcattccgtgtggatgccaccaggcgcgcacgtcaatttcgcggtagcggcctttatcgtgcagcgtgaagctaagccgccccagacccggcgctttcatgccaaacaagagcgtgagctgtttttctggttcgacaatgatcactttccagctatctaccgtatcgccaggcttgagcaaggtatgcgacgggcggcctttcgccagtttatgccccaccagacggtccatcgcggcgcgcgtctgccacaaaatattgccgaaaaaatagccctctttgccacccagccgatttacgacctgccatagcgccgataggctggccggggtctgcgcggtaaagcccgcctgctttggaaaatagccgtattcgggacgccagcgggcgaaggccagcgcgtcgtagccccagtcgctggagttcaccagtttttcttcttctttcagcgtgcggcgaacggcgtcatcaaaggtgataagcgtttgggggatcaacttttttaacgcggcgtcatcggccagcaaatcgtgccttaatccctggattaacgcttttgcggtagttggcggcacggaggtaatgacgtttaaaaaccagaccgaaatccagcgggtcggaaaaggcaccgggatcagcggacgccgtttaccgctgacggccataaaacgttcaaactgctgctgataacttaatacctgcggcccggcggcttccagaatacgatgctcgtgcgcagggtgctccagtaagccgaccaggtagtagagtaaattttccagggcgatgggcgtggtgcgcgaacgcacccagcgcggcggcgtgagtattggcaggttgtaaaccatgtcgcgcatgacctcaaaggcggcggagcctgcgccgacgatgatcccggcgcgtaattccgtcaccggtacgcctgcgtcgcgcagcgtgtcagccgtaagctggcgggcgcgcaggtgatcggattgctcatgcgccggcgcctgcaatgaactgaggaaaataagttgtttaaccggcgtctggcgcagcgcgtcgcgcacgttgagcgccgcctgacgctcatgggcgataaagtcgccgccttcgcccatgccgtgtaccagatagtaaacggtatcaatgtcgcgaagcagcgcgggtaaattttccggccagtgcagatcgaccttatgacaactgacgttggcgaggcgatgtttttccagacgttccacgcgccgcgccgccgcccgcacctgatgtccttgctgacttagcgcaaagaccaggtgctgaccgatatagccgctggcgccgaggaccagaatgcgttgcgccacNNNNNNNNNNNctagatcacgtattcgatcaacgctggttcttgtttacagaagcgacgccagtcgacaatcggcattcgtacctgcggactgacgctaccgtcttcctccatccactctttttctattgcctgaagctgataaaaccggcttctcagacgcccttcctgcggcagcaaacgcagcgtatgctgcgccacctcgccggaaagacgctccgtcaaagcctgaaaaagctgtggtattcccacgccgctttgcgctgaaagccaaacgcggatgggtttattctcttcatctctgtcgatacgcggttcaaagtcgtccagcatatcgattttgttcatcaccattaaggtggggaattcgtgagcgtcaatctcttcaagaacggtgtttaccgcctcgatgttttcctgcacacgaacatccgccgcatcgaccacatgcagcagctgcgtcgcctgacgcgtctcctgcagggtagctttaaaggcagccaccagatcgtacggtaaatggcggataaagcctaccgtatccgccagaacggtctcaccgacatccgctacatcaatacgacgtaacgtggggtccagcgtcgcaaatagctgatctgccgcatagacccgcgcttcagtgatctgattaaaaagggtggattttccggcgttggtatagcccaccagcgataccgtcggaacgtcggccttgatgcgcgactgccgcccctgctcacgttgcttctcaactttctccaggcgcgactgaatctgcacaatgcgattacgcagtaaacgacggtcggcttcgagctgggtttcacccggaccgcgcgaaccaatcccgcctttccgacgttcaaggtgggtccagccacgcaccagacgcgtagccagatggcgtagctgcgccagctcaacctgcaacttaccttcatgggtacgcgcacgctgggcaaaaatatctaagataagaccggtgcgatcgataacccggcactcgcacaaacgctccaggtttcgctcctgggctggactcaatgcatgatcaaacaatacgaccgctgcgccagtcgctttcacggcttccgcaatttcaactgccttaccttcacctacaaagtacttcgggtgcggtgctttacggctaccggtaatcacctgcattgcttcgacaccggcgtaatagaccagagattcaaactcctggaggtcttccatatctttgtcttgcgaaaaatagatgtgtaccagtaccgcctgctcaccggcatcataacggtcaaacaaNNNNNNNNNNNtcagccgctaaacacgttaccggcgcccggcgcgctttttaacacccagacgcgaccatagtgattataccatccggcacgatgcccggcatccgggccaatcccctggtaaatatcaaagtgctggcctttaatcgctccgccgacatccagtgcgaccatcaaacgtagctcatactgaccgctaaatttaccgttgttatccagcaacggtacttccgccaacaaggttgtgcccggcggaatgatgctgcggtcggaggcgacggatgctcgcccaatcagcggtacagcgctggcgcctttgaccggcgcaaaagattgcggtttaaagaagacgaacgacgggttctgctccagtaattcacgcacttccgcttcgctgtgcttctctccccattcgcgtatagcctgcatcgacatatcttcttttttcacttcaccgcgatcgataagcactttaccaatactgcgataaggccagccatttttaccggcataactaaagaagttcagcggactaccatcaccgaaatcaatataaccgctgccctggacatccataataaagttatccatcagcgaattactccaggccaggatgtacttatcgctcagcgcgcctgcgtagatctgggcgcgggacggtaagcgtccgcgttttggcggcatactatagatagggtactggaacgcgccctggcgcgtatggcgagcctgaacgacgggcgtatagtagcccgtgaactggacgttaccgtagttgtcggtgccttccatctgccaggcatcgataccaaactgacgcatagtgcgcgtatcgcctccggaacgtaaccagttctggacagcgttatagacgttgctttgattggtgtataaacgcggcgacgcggaacggatctggtcgacctgctcggcaaagtcaccagcattaatcggcgcgcccaccgcgtccggctggtttaccagggagaagggctgggtaaatttcccgtccttatattgctgaccgcgatcggtcggttttgatgaacaggcagccagcattgccagcattacgcctgtcgccacatattttgcccaacgtcctttcatNNNNNNNNNNNtcattctgacacctccattttttgcgccattttggatgctctgtattcagggatggtggtcacaatcgcaccgactaacgcgaacagcgtaccgatgatggtgaccaggtagaccgtattgcctaatgaagggatcaattcatcaattagcactgagccaagcagttggcctgctgttgacgctacgcccagcatcaatagccctaagcctctcaccagaatcgccattagcccgatggatagcagacccagcggaccaccgagatacatccaccatgtatcgggtaactggatggtgacatggcctaatgcgatacgtatcgccagcgccgcgcccaggacacaaaagccgacgatgaagttccatgtaatggacaccagcatggagcccgttgcctcggcgactttcgcattccccgcaggctgccagccagcgagtaaccctgccaaaaaggggaggatagcgagcaggataaacgaggttgagtgccactgtggcgacacgacaaaaatggtggcgataacggcgaacaatgcgccagtaatgcgccatggcgtaaaatattttttctcctccacgccgatgccaaaacggtcgcacagcaggccggaaagaagcagagcggaaattaatgccgtttgaaaggtggcaacgcccagcgcgctggcggatgcgccttcagaaaatacgaccatcgccccgcataatcctgcaaaccaattccatagcgggatttttctctttttaatcagagtagggattgaggcgaattgctggcgtgtttctttgcgcgcaataataataaaaaacatgacgaccagaccgctggcaaacgagattactgcgcaagcattaccgtcttgtaaccaatgtcctaactgcccattaacggcagactgcatcggggaaagcataccggctaagatggtggcaagcatcagtaagggggttgagtacttattcttgttcatNNNNNNNNNNNtcagttaaacggttgtaagtcgacacgcgccatcattgcggccaactgaggacgatcggtaatacccacattgctctggctgaccgccagcgcggcaaccgccgtcgccaggcgcagcgtatgttcggtggactcgcgcatcagcaggccgtaaatccatccgccaaccatggaaccgcctgcgccgacggtattttccacgtcaaccgccggtggtttagcgatccattctcctgaggcgttaacccgcagcgcgccttccgcccccagcgaaatcaccacatgagcgataccctgttcgcgtaacgcgggcgccgcatcaatcacatctttcatttccgggagcttacgacccgcccaaatttccagttcgcggcgattcggtttcaccagccacggcgcagctataagaccggcgactaactctacacggctagtatcaacgataatgcatggacactggctgcgcagacgcgtcatccagtcggtgaacgcttccggactcacgccagccggtaagctaccgcttacgcagaccatatcgaactgacccagccagctcaggaagtcgttaacaaagcgttcccagtctgcgggagtcacgtcaaagccgggaaagttgaagttggtcacttcgccatctttttccgtcagcttcacgttgatgcgggtccggccctgaaccacctgaaagcggttagcgatacccagttcgctgaataattgctgaaaaccgtcctggttatctttaccgagaaaaccgccgacagtgacgtcgatgcctaagtctttcagcacattggcaacgttaatgcctttgcccgccgcgtgcagacccggggttttcaccaggttcacNNNNNNNNNNNttaccattgcgtgccaactcccacgctgtctaaccagtctgaaaccacatcatgcgcgctgtgcgcggttaaatccacatgcaacggcgtgacggagacgtagccttcatccaccgccgcgaaatcggtatccggcccggcatcgtatttatcacccggcgggccaatccagtacaatgtattaccgcgtggatcttcctgcgggatcactttatccgctggatggcggctaccgcagcgagtcacgcggatgcctttaacctgcgctaacggtagatccgggacattcacgttgagaatacgcccggtacgcaacggctcccggcttaaccctcgcaaaagcgcgcaagtcacggctgcagccgtatcataatgctgatagccgttaagggagaccgctaatgccggaaagccgagatgacgaccttccatcgccgcggcgacagtaccggaatagatcacatcatcgcccagattcggacccgcgttaataccggaaacgacaatatccggacgcggacgcattaaggcattaacgcccagatagacgcaatcggtcggcgtccccatctgtacagcgatatcgccattatcaaaggtaaaagtacgaagcgaagattccagcgtgagggaattagacgcgccgctgcggttacgatccggggctacgacctgtacatcagcaaactcacgcagcgctttcgccagcgtttgtataccgggcgcgtgaaccccgtcatcgttactcagcaatatgcgtttNNNNNNNNNNNttagtccccttcaaggagcaatacagacacaacaataatgataaaaatggcgaaaaacgacgctgttatcatcagcgcttcaagaaacggtggatcgtacatNNNNNNNNNNN----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- diff --git a/tests/test_data/genomes/G9.fasta b/tests/test_data/genomes/G9.fasta new file mode 100755 index 0000000..d592eb2 --- /dev/null +++ b/tests/test_data/genomes/G9.fasta @@ -0,0 +1,2 @@ +>G9 +NNNNNNNNNNNatgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaaNNNNNNNNNNNatgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtg------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------NNNNNNNNNNNgtgaacctggtgaaaaccacgggtctgcacgcggcgggcaaaggcattaacgttgccaaagtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccgacttcaacttttccggctttgacgtgactcccgcagactgggaacgctttgttaacgactccctgagctggctgggtcagttcgatatggtctgcgtcagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatctttgatagtagccgtgaagcgttagtcgccggtcttaaagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcacgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgtgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggtaagtaccgtcggcgcaggcgattccatggttggcggattgatttacggcctgctgatgcgcgagtccagcgaacatacgctgcgtctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactgaNNNNNNNNNNNatgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatgaNNNNNNNNNNNatgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctgaNNNNNNNNNNNttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctcttccgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccaacgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcagaaaggcgggattggtttgcgcggtccgggtgaaacccagctcgaaaccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgaaaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgctgctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaaatccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtctgcaggtacgaatgccgattgtcgactggcgtcgcctctgtaaacaagaaccagcgttgatcgaatacgtgatctagNNNNNNNNNNNgtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatgaNNNNNNNNNNNatgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctggtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatcagcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacgctgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcattcaaccggaatggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaacgctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgaggcgctggcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtgagctgaccagccttgtgcgcaaccttaatcaactgctcaaaagcgagcgcgaacgttataacaaataccgcacgaccctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatcagcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcataccccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttttggccgacagcatcccacacagaaagaggaataaNNNNNNNNNNNatgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataaNNNNNNNNNNNatgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtagNNNNNNNNNNNatgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacattgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccaccgtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggccgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattactggaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcattcagggcggtacggaagtttgtcagcaaaaactggataccatgcgcaacgcaggcgtaaaagtgaacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggcgtcagcgcagagatcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacaggagaccggcaagctcggcgagatcctgttctttatgcgtgcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccggcaataactggcagttcgacggcgacgcggaaactattgcccactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgaaattaccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaaNNNNNNNNNNNatgaaactttttctgacaacggcagcgctaactgcgaccttgatttccggcatggcatttgccagcgatcccgtcatcccctgggcgactaacagcggcggtacggaaagcacgcatattgcggcaatgggcgaggatttgaatgcgcagcatcagcagatcacccacacgcatgaaggcgtatgggcggccaattccggcagcattcaggctgatgaagccgcgctgaccagcaacaagccgccggtacaaggccacccggaattgatgccgcatcagggctaaNNNNNNNNNNNatgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaaNNNNNNNNNNNatggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaaNNNNNNNNNNNctgaaactaatacctttttatttgctcgcgctattttctgccgcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaaccactgcagataacccgccttataccatgacaatattgccgccagtagaaggctgcggggaaaaatgtgtgaaactgaacgtacagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaaggacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctataaccgacagctgcaattactttgggctggcgtaataccgaaaaataaaaaggggaaatggataacgaaaatggaaaattactggttcaggcaggtaaaagcgcactaaNNNNNNNNNNNatgaaaaaattttatagctgtcttcctgtctttttactgatcggctgtgcccaggtgcccctcccttcctccgtgagcaaaccggtacagcaacctggcgctcagaaagagcaactggccaacgcaaatagtattgatgagtgtcagtctcttccgtatgtgccgtcagaccttgcgaagaataaatcattatcaaaccagaacgctgataattccgcatcaaaaaatagcgcaatcagctcaagcattttttgcgaaaaatataaacaaaccaaagaacaggcgcttaccttcttccaggaacatccacaatacatgcgttcgaaagaagatgaagagcaactcatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataaNNNNNNNNNNNatgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaaNNNNNNNNNNNgtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggcctcacaacatgcctggagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgtcagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcaccatccagtcggcaacctggttttccgcaggcaggtcgcttacggcggataaaaaaatgatggattgcggcagcggtatttatgcttcaatcaatacgttactgaaaaagtcgcagaataaaaatatagtcatttttacccataatcattgtttgacgtatattgcaaaaaataagcgcggcgtgaagtttgatccggactatcttaatgcgctcgttatgtatgcggaaaacggaaaactctttttagacggtgaatttgttcccggctagNNNNNNNNNNNatgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatgaNNNNNNNNNNNatgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctagNNNNNNNNNNN----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- diff --git a/tests/test_data/outputs/db/blast/nucleotide/nucleotide.fasta b/tests/test_data/outputs/db/blast/nucleotide/nucleotide.fasta new file mode 100755 index 0000000..39f119a --- /dev/null +++ b/tests/test_data/outputs/db/blast/nucleotide/nucleotide.fasta @@ -0,0 +1,40 @@ +>0 +atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa +>1 +atgaaactttttctgacaacggcagcgctaactgcgaccttgacttccggcatgggatttgccagcgataccgtgatcccctgggcgactaacagtggcggtacggaaagcacgcatatcgcggcaatgggcgaggatgtgaatgcgcagcatcagcacatcatccacacgcatgaaggcgtatgtgccgcaaattccggcaccattcaggctgatgaagccgcgctgaccagcaacaagccaccggtacaagtccaaccggaattgttgccgcatcagggctaa +>2 +atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa +>3 +ctgaaactaatacctttttatttgctcgcgctattttctgcctcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaacccctgcagataacccgccttataccgtgacaatattgccgccagtagaagcctgcggggaaaaatgtgtgaaactgaacgtagagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaacgacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctacaaccgacagctgcaattacttagggctggcgtaataccgaaaaataaaaagtggaaatggataacgaagatggaaaattactggttcaggcgggtaaaagcgcactaa +>4 +atgaaaaaatttgatagctgtcatcctgtctttttactgatcggctgtgcccaggtgcctctccgttcctccgtgagcaaaccggtacagcaacctagcgctcagaaagagcaactggccaacgcaaatggtattgatgagtgtcagtcacttccgtatgtgccttcagaccttgcgaagaataaatcattatcaaaccagatcgctgataataccgcatcaaaaaatagcgcaatcagctcacgcattttttgcgaaaaatataagcaaaccaaagaacaggcgcttaccttcttccaggaacttccacaatacatgcgttcgaaagaagttgaagagcaacacatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa +>5 +atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa +>6 +gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggactcacaacatgcctgtagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgccagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcgccatccagtcggcaacctggttttcggcaggcaggtcgcgtagggcggttaaaaaaatgatggattgcggcagcggtatttattcttcaatcaatacgttactgaaaaattcgcagattaaaaatatagtcattttaacccataatcattgtttgacgtatattgtaaaaaataagcgcggcgtgaagtttgatccggagtatcttaatgcgctcgttatgtatgcggaaaacggaaaactccttttagacggtgaatttgttcccggctag +>7 +atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga +>8 +atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag +>9 +atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa +>10 +gtgaacctggtgaaaaccccgggtctgcacgcggcgggcaaaggcattaacgttgccaatgtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccaacttcaactttcccggctttgacgtgactcccgcagactgggaacgctttgttaacgacttcctgagctggctgggtcagttcgatatggtctgcgtaagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatcgttgatactagccgtgtagagttagtcgccggtcttatagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcccgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgcgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggaaaataccgtcggcgcaggcggttccatggttggcggatggatttacggcctgctgatgcgcgagtccaccgaacatacgctgcgcctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga +>11 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>12 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>13 +ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctattacgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccagcgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcggaaaggcgggattggttcgcgcggtccgggtgaaacccagctcgaagccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgagaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgcagctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaattccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtccgcaggtacgaatgccgattgtcgactggcgtcgcttctgtaaacaagaaccagcgttgatcgaatacgtgatctag +>14 +gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga +>15 +atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctcgtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatctgcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacggtgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcactcaaccggaagggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaaggctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgatgcgctgtcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtaagctgatcagccttgtgcgcaaccttaatcaactgctcaaaagcgagcacgaacgttataacaaataccgcacgagcctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatctgcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcatagcccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttctggccgacagcatcccacacagaaagaggaataa +>16 +atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa +>17 +atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag +>18 +atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacatcgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccacagtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggtcgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattacttgaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcatacagggcggtacggaagtttgtcagcaaaaactggataacatgcgcaacgcaggcgtaaaagtgtacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggggtcagcgcagagctcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacagaagaccggcaagctcggcgagatcctgttctttatgcgggcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccagcaatgactggcagttcgacggcgacgcggaaactattgcacactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgagatttccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa +>19 +atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa diff --git a/tests/test_data/outputs/db/blast/nucleotide/nucleotide.ndb b/tests/test_data/outputs/db/blast/nucleotide/nucleotide.ndb new file mode 100755 index 0000000000000000000000000000000000000000..a9b411b3621a29a79d75d1c0da6f615196410c93 GIT binary patch literal 20480 zcmeI%K~94}6adgKnn>JWOz&Xac?7Rw6q6Y_2JE}A?R z%pYd>{61E{M5H5cgOBCs;v`+#jMwis?esDtziXqtr?39lyJ@tT_U}A%v@Rq-fB*pk z1PBlyK!5-N0t5~h82ax2=>Lc8L)6ur^`)orte^kfcla-qfdByl1PBlyK!5-N0t5&U zNd0$27L%A4%VJ(oAwYlt0RjXF5FkK+009C7{#M|&NteZWyw$Vn{<+*F&tv!qK1AJP z)BLoXMvU&CR!{YB?@yoGW1Fa{;wo0fO81{=jRXh~AV7cs0RjXF5FkL{2n8`+M{ky7LD7ATZ(!@Cx4FfuDdc!3W?g@FDm;_$l}r zd<6ageg^&sKK}2Op#dfopasq;Kod+UKpV^`KqFjGfL6Gq0L?I`0PS!^0UF|(0<^@< GkH8C-tr(mD literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/db/blast/nucleotide/nucleotide.nin b/tests/test_data/outputs/db/blast/nucleotide/nucleotide.nin new file mode 100755 index 0000000000000000000000000000000000000000..10e6f3db680b3ce612ca913c6c072fc64983b718 GIT binary patch literal 372 zcmb`C%L)Nu7=~YGjgV+IO8BFzm!Uhb-5Ih@F&bvCjaS3G zM80)2-@heuy^lzxP*O8CyD}+XB0l#oJb!sUA5YjZv*(e^Q-CbXIH7APY}09RzU`{l z<&}dsG+lKzlYPWM&y4B2FXB0LKEJQ|1(W}4Aaq}3LmU+NR^B|84%m$R^ehzNYTM94 zG{XCN+RD;!;LhCRp|tG}v_)(3ynrGNckk4A$poVw=C_y<1nmYhNie(1AA0F%_oeb6Y`l1D7@4{cnV2kZn#&eJfKB&|a~8kcvs&jP0r| z=}p!#h+f~gLP1ZSyIgg#)B(ZJG|d(Tvvx78wMmO6?+jK+Kcq!nU>SbYn2NvUx`+Ml zDG_mJD1Rzex2k?CwKfCc7N`|rwXXo1m>WrFw8Qu78xNsZ12H< z?=@Uu=q&JA)Z6vs@v8-Pg_k=nH|Lt%w=qRsVrA--!@X5ru5E!QN1w<)XRM|fZ1Wi2 znGHJh<#}tNd@)b+HfLatdK%uTws%b5VpG3te>AhCsXvvuCD_EMTXT6&Dqdd~t>@^( zURad}Ya)+FJHXu1yQ9o{lwiXXSF01wPuyO;$#;K}vUG*=W&$=V#Ur}pv9!&$G7heI z=vzt}%Pn=V|@v?l$-*M18t9 zq>DYU1~WiWn}3wye|T$1p&xS-!CAXg{-&j`y{NEq=k^Xv1l=qYL3r3s2EbO;{(1Jj z&)iV_j-2)I_jrL6vl^rEP@g$lb~L*hGCSlE(JjyikFCp#g~nH2!Rv>20m@wHNv$(~ z?1ameUA7GYRR_s=GVd^ue-goo1R?Z9lqvhFOIagHB&mRru_!WsBRlVXz_!htw>@!J z$P^b5lbU;jJV*?C$3M+ux+S>rL&%}EW`}A6Y*2Y=u3LWA-In-u!Xx>3Sm$ZE&DuPa z8o}I*6+6goC2TYNJ6+S*o9{9o7~DR-N;qTplYvd``l281w@uQDyb_lB!&04Rv9jPd zQ?7IA>wlS-93NJHS@v@6EySgg72BSx`@FmFpq)*yA=+9#g<1KTK1ML*UUAj2A(QT& zU(O^AOr_+vv9B42rA;~Uqv<$rr;!A-*83$S?riY=Y75B2iNARF{dy+xOI^tS!QAq^ zlIscVdrtxUfHwuwqzG@V%I@367sO3ybE(2#zg(h--cly^ETm+)6P&LayO;1xyb=cI z=!VgT3FOhNHTq~nkKjQ%lP_z2{iXUUNk9}&pmoHBDz z%`1(Z>8osb1E2bUcQN@ytkcZHHD6&=^TM!k-OVq!X-5&&hrRmCl(-8rEi-v#Twl%@ z)aq(0eUpHo=e44>0b_FTvAPjt`*q>n&fB>v&1TbD*qM6_1SH-X0$=rQncDj)FShY2 zwQiZF>40T-jra~>Qs)MXOi}_=BjqJHCyTV?I2eb2Xy+cEZ}=4UyBA!fh_cpm<7K9D zZe+=^X67Mne_sXkel(5lccYA*FoOwv;N~$Q`=0mjt$#>0i~E5@uvt5u#7D9=pqlz( zycUZilL<)V1-^0h*t1sI60=eSQ3UOXOQnX{McO8Ul|Eg$Kni8cMHkJv(WeKZdtc3Ya8xKx@z^nq5yKAKu>NZ zj$+A7q1^N{Jg@gl@$B2tLU6`pQ#_t{(-Jnckgv?^heA4di=>^KAKFO9%H7%%i&0mS z&Yj&9-|MGThzkXx8O^ctxfd?GsUCO)r+%KGZni~1RFE$n%==$hV;FWJ;*qMX;Y#Z$ zv#IF$c#%6{viat<${`L@h#JP&`phavW{o&T+0h}k2DFRJr$Ns5!6mwVE#c$Nu(RpL zHqW&nZ>`cce^;36R*N}r$JSWe`dP7?Qa&F9i-;Jjv2omL&RRcRuza>!dtZrQjvS$$ zt=%Kfw2Sp0{URg@7r*oM&P^CVeDFuM)A6=hX(ugJ?o2Q4-R7L9A!(*Dtk)m z@~1^!y;eo}BC$cGIXjpwm}-VuW_{oUBq7qPtn`dy1p31C{!;&4lBKsP8bp^3QjI2n z0sBBN$Yylxat1fZlT^7qx>KNUva0BT*P~I54Vlsn_c0`AJdUrn#q5!xcUE~7l<~w9 zJWj*5?^WcuBS$cAOV<`*o~Xc_E&rzwlG5I9s#`Zi?6q zZK1QVb(&@O=~24!8_HXdDC#p#`CPFQCrrVgZzbj-W;Pf^QDxJg`|1`kpT$RDRtklP z{a1oLu{uVcBS8u&likO)Q7f+wH@{A8B_cdk=O8R~V0^?+l>$5*IevYlr4j<5#MCB{ zhV!|6i@*;KHQCclJZwFja)-P_X0o2dd_;BU+d_K=3tWW()MEHdEHHyb(7JsVzg_C^ z3LrtjHb=8ffr9)y=E)}*d?M6WYi@+#l?6!v+fiJ~&1b-M)q=L%;1Y>PxhEXEed6?3 z6brq(-?XNpztoR$JJWfHsYj|LR;$}bFh5Sw7t0$3-P8x*QCi`*Se)WpjcQY)ap7_c zNyo$+rG9=_Ehelg32aR?-w0% zT8qKLj4x6C*vMl67bYt!9)G6X&f1FW;~TMONGkF36Y0Ei*@Cj4bCi1f>f*Nx>0;Xk zdF-mC_ft!)G*<_7asDQQB4ON{*qnuo@d+&xcit!+#s5Ie@g&s4%`A-*-LC@f&L<+{$R6Xpu6K|taaeV_<3)$6p6l}*aCSSv8LrUd28)FnmXnmK?s>1 zT6$&8F0egv8l%w>)wVKLgCb>}HN#%FZLO$~kk5O>ZI|M?POF&fO~ti9q`Mt`l4l$g zYOHy{0p{w9)vE($`ELjE&BZaBlGq(xQ3v2}7?*@%!e z(q-?8<-8M64SHFb%^&$1M8A1~(l$px14)gDgX>fgIo$YH$!_FdBu{D;FEy8!uueX5 zZhk&jwM5aI^;O7NlKZ@(>ofP>C&Cd+;f`rNl-VM$4CLs6f}%N!sbF<_ zN|VaF=^Gu8;(N;pnbHE1x1Qb*-Yl7VM_l3ZRa_sI5k@?E@(T0GRw2GO2goI$=v7H- zYSz~w;MG{=!FKXwSF^lCOgKLJxnN%tFV*#tX(h+}QTNk>P3bF7d6b+5>?t~_rkPyI z{I?HW5CD*1k?MH_WWoxM?Wxr{J+>gP?!7ua)2I2cNW;QeW>_RN zfxiF!%KE5>oiD~2*Zk_iwdY@IwL2>nsCzM}bQ#p5%;V){#0#^V>Qw`}k?D@(OiB)9 z$ticJ8&n0cteaS4IV}n+9x4j1dP$l~Oe9YhUaA_cZ}mM#gXb#GK6 zVlF`u$k6FL`8=wO$l>o8%NomoX3me2zdcz2f8Jl&?ueazl?VwYA;n_aecYIR>^v>? zNnvWbo9~uqO#W9wA>S&v%Ooha)+3^+5=cN2!_50@Uhf!$6hgSciR@TXRI)WdCNp4@ za~jM5>pVTCAE_8jGz5c>oN{-$o%MOJV)qw_{?F@5hfAX;sN;m zo_Cq&3Y8SHzq;hY&7%AqB32zcdcO;LHbG?@4Ji6 zXF-N%;IHh9Ac$yO$dDX+raQ?=~!Tv?zx#{h<}|rsARE2E1~ZvB|j?DUn?OM zA^F#rY1d*^!`6+%npyawf8?B+mH;*fJOZ8~PCiC6#EgheSapnSI{Vm=pnq-`&^2v!pFhs&6WR$y41{o8a1s8Xo?Yce% z*ywgLW|%W*OyL60b;d7vs}Bk5dqdp8=95`T(QH}9mF8@$$V-Lg(rl~2opnG~pu-vf8hE{MGW=$}3NB@mwA8vqcG71| z?f2x_+D4izJhB^@>dB9$>R+3J5&96#A28U2idD>oZ1Ca*){(da3|cUtPSCcHHShUG zRu_~}pouk1;NAQ29p_#nMWfp%leM61)uM@HGh00h)JqdBkqo&9%JCs?+E^fTY^51) zvsIkjp7eto#bUni8h;S_K=x}7v#%&~S;*a|(157i@$~o3%aeu<2mL8fKx`(PcsF<& zc{90NKrLEO3N;UMcT$S8pWea$R(3V6%WzBy;4j+Pbga))L-}v|GAdcL!As^I4E|$w zJ)99S9Igh_R4}ah_U4&B$p{t?i%T-uFgglm4U;4Ae<+XP^1q{Ip~1M$vM&~*8;iWm zGU$>i8-Q)WDGLRzwOViJlA?t*cT&`^RD-j*l{()T4?r^FgMeOG@w`LqC{MF|JI0M- z>klW>%QOWRwfeU}ELN$D3L$j{^u7+jMltP`H4kcbOX4$abCe8%Mc-Y|2gXY{Z^1xp z-~F{B+e*cTw^eS>+Q)#0f&v0*zjc{VQk~#o#vy5Y_1!Uks8Tsis4MjKKV*<M1-g zehQqAys!ub1YZ}JZPo~4Z1m|8Tz>QF`D8~PeHkjfJ3n@#mTBR)?OiO1$WC^gxq=J4 z*Dqpx8paS!8ciX*5ZD>**+D}93cC%yE$cb7^Aj!Eg;%O^4kG619m;hU3h_~e1sc@` zf)GH-E45+<_gP_F#PB!y`E=tF9Z)TR;V8aL@bSA4s)M22kc4hX~XwjtV8 zzL_^biVHUzFLjem4Dfq**#_=er373f0V*WW$kikjLll5DSxWkAMDY6o28H807xW)p z_h$KCLCnoCDf72<1bKuF&s97wlX90C{GMW2=S08g&+X^-#*w`dMe5=^lQ!i`jezn^ zUx1}=;ruxsaVgdoBLJ8I#Egl*Aq+(VZHJP>dzo%BjFR++9eWG@F^1yKXezT2B%KYK ziP|T(vDolmG4t-PGI1XvZUbM^g(yCcWK(D?Lc@1YSi@p{p+HqW9@Fr1T_$d!a|DnwR0}#*Gvzd|71X z{g6z)-S85T*RV^r-#=duIdzJkyQgkAju_6m?CR~ezpidNTg?3z?>t+V5+Fc;009C7 z2oNAZfB*pkw+qz1`!D+c`t+KWnseOrpYYhnoWK3cJb?fK0t5&UAV7cs0RjXF5J-XS zzayHM#HQV}xgruEK!5-N0t5&UAV7cs0RsO);N@6E6QfwqKVBEpx7F%>xjxnP9Q72v tW~JgB=Xra^(*N(D`o%y literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/db/blast/nucleotide/nucleotide.nto b/tests/test_data/outputs/db/blast/nucleotide/nucleotide.nto new file mode 100755 index 0000000000000000000000000000000000000000..91d3a927c3e718edb4b7b0774fed440a3c193069 GIT binary patch literal 84 vcmXBF2?~H9002QdP16qZ|8H%;g<*RbO*XTzvau66NMue5m5ZB)m&V5r8>|4+ literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/db/blast/protein/protein.fasta b/tests/test_data/outputs/db/blast/protein/protein.fasta new file mode 100755 index 0000000..38c6a95 --- /dev/null +++ b/tests/test_data/outputs/db/blast/protein/protein.fasta @@ -0,0 +1,40 @@ +>0 +mydppflealmitasffaifiiivvsvlllegd* +>1 +mklflttaaltatltsgmgfasdtvipwatnsggtesthiaamgedvnaqhqhiihthegvcaansgtiqadeaaltsnkppvqvqpellphqg* +>2 +maknrsrrlrkkmhidefqelgfsvawrfpegtseeqidktvddfindviepnklafdgsgylaweglicmqeigkcteehqaivrkwlearnleevrtselfdvwwd* +>3 +lklipfyllalfsassgateinackdligtwktpadnppytvtilppveacgekcvklnveyeldvthrnalycherqegvkgqgpmviafegaygghaigtynrqlqllragvipknkkwkwitkmenywfrrvkah* +>4 +mkkfdschpvflligcaqvplrssvskpvqqpsaqkeqlanangidecqslpyvpsdlaknkslsnqiadntasknsaissrifcekykqtkeqaltffqelpqymrskeveeqhmtefkkvllepgsknlsiyqtllaaherlqal* +>5 +mrikpddnwrwyydeehdrmmldlangmlfrsrfsrkmltpdafcptgfcvddaalyfsfeekcrdfeltkeqraelvlnalvairylkpqmpkswhfvahgemwtpgtgdaasvwlsdtaeqvnllvvepgenaalcllaqpgvviagrtmqlgdaikimndrlkpqvhchsfsleqav* +>6 +vlaftlrfiknkryfailagalviiagldsqhacsgnglpqingkalaalakqhpvvvlfrhaercdrsdntclpdstgitvngaqdaralgkafsadiqnynlyssntvraiqsatwfsagrsrravkkmmdcgsgiyssintllknsqiknivilthnhcltyivknkrgvkfdpeylnalvmyaengkllldgefvpg* +>7 +mgsnyivieglegagkttardvvvetleqlgirnmiftrepggtqlaeklrslvldirsvgdevitdkaevlmfyaarvqlvetvikpalaqgvwvigdrhdlstqayqgggrgidqtmlatlrdavlgdfrpdltlyldvtpevglkrarargdldrieqesfdffnrtrarylelaaqdsrirtidatqpldavmrdiratvtkwvqeqaa* +>8 +mkhikksvlvvlltshvahasivvggtrlvfdgnndessinvenkdskanlvqswlsvadpqvtnkqvfiitpplfrldagqknsirvirsgaplpadresmywlnikgipsiddnasanrveisintqikliyrppaltkstpdsqsqqlkwqtagdvitvnnptpyymnfasvtlnshevksatfvppkssasfklsstaaphgtvtwrlisdygmslephsgsf* +>9 +mrillsnddgvhapgiqtlakalrefadvqvvapdrnrsgasnsltlesslrtftfdngdiavqmgtptdcvylgvnalmrprpdivvsginagpnlgddviysgtvaaamegrhlgfpalavslngyqhydtaaavtcallrglsreplrtgrilnvnvpdlplaqvkgirvtrcgsrhpadkvipqedprgntlywigppgdkydagpdtdfaavdegyvsvtplhvdltahsahdvvsdwldsvgvgtqw* +>10 +vnlvktpglhaagkginvanvlkdlgidvtvggflgkdnqdgfqqlfselgianrfqvvqgrtrinvkltekdgevtnfnfpgfdvtpadwerfvndflswlgqfdmvcvsgslpagvspeaftdwmtrlrsqcpciivdtsrvelvagliaapwlvkpnrreleiwagrklpemkdvidaapalreqgiahvvislgaegalrvnasgewiakppavdventvgaggsmvggwiygllmrestehtlrlatavaalavsqsnvgitdrpqlaammarvdlqpfn* +>11 +mnknkystpllmlatilagmlspmqsavngqlghwlqdgnacavisfasglvvmffiiiarketrqqfasiptlikkrkiplwnwfaglcgamvvfsegasasalgvatfqtalisalllsgllcdrfgigveekkyftpwritgalfaviatifvvspqwhstsfillailpflagllagwqpagnakvaeatgsmlvsitwnfivgfcvlgaalairialghvtiqlpdtwwmylggplgllsiglmailvrglgllmlgvastagqllgsvlidelipslgntvylvtiigtlfalvgaivttipeyraskmaqkmevse* +>12 +mkgrwakyvatgvmlamlaacsskptdrgqqykdgkftqpfslvnqpdavgapinagdfaeqvdqirsasprlytnqsnvynavqnwlrsggdtrtmrqfgidawqmegtdnygnvqftgyytpvvqarhtrqgafqypiysmppkrgrlpsraqiyagalsdkyilawsnslmdnfimdvqgsgyidfgdgsplnffsyagkngwpyrsigkvlidrgevkkedmsmqairewgekhseaevrelleqnpsfvffkpqsfapvkgasavpligrasvasdrsiippgttllaevplldnngkfsgqyelrlmvaldvggaikgqhfdiyqgigpdaghragwynhygrvwvlksapgagnvfsg* +>13 +lfdrydageqavlvhiyfsqdkdmedlqefeslvyyagveamqvitgsrkaphpkyfvgegkaveiaeavkatgaavvlfdhalspaqernlerlcecrvidrtglildifaqrarthegklqvelaqlrhlatrlvrgwthlerrkggigsrgpgetqleadrrllrnrivqiqsrlekvekqreqgrqsrikadvptvslvgytnagkstlfnqitearvyaadqlfatldptlrridvadvgetvladtvgfirhlpydlvaafkatlqetrqatqllhvvdaadvrvqenieavntvleeidahefptlmvmnkidmlddfepridrdeenkpirvwlsaqsgvgipqlfqalterlsgevaqhtlrllpqegrlrsrfyqlqaiekewmeedgsvspqvrmpivdwrrfckqepalieyvi* +>14 +vaqrilvlgasgyigqhlvfalsqqghqvraaarrverlekhrlanvschkvdlhwpenlpallrdidtvyylvhgmgeggdfiaherqaalnvrdalrqtpvkqliflsslqapaheqsdhlrarqltadtlrdagvpvtelragiivgagsaafevmrdmvynlpiltpprwvrsrttpialenllyylvgllehpahehrileaagpqvlsyqqqferfmavsgkrrplipvpfptrwisvwflnvitsvppttakaliqglrhdlladdaalkklipqtlitfddavrrtlkeeeklvnssdwgydalafarwrpeygyfpkqagftaqtpaslsalwqvvnrlggkegyffgnilwqtraamdrlvghklakgrpshtllkpgdtvdswkviivepekqltllfgmkapglgrlsftlhdkgryreidvrawwhphgmpgliywllmipahlfifrgmarriarlaeqitek* +>15 +mnkfarhflplslrvrfllatagvvlvlslaygivalvgysvsfdkttfrllrgesnlfytlakwennkicvelpenldmqsptmtviydetgkllwtqrnipwlikstqpeglktngfheietnvdatstllsedhsaqeklkevreddddaemthsvavniypattrmpqltivvvdtipielkgsymvwswfvyvlaanlllvipllwiaawwslrpidalsrevreledhhremlnpettrklislvrnlnqllkseherynkyrtsltdlthslktplavlqstlrslrnekmsvskaepvmleqisrisqqigyylhrasmrgsgvllsrelhpvaplldnlisalnkvylrkgvnismdispeisfvgeqndfvevmgnvldnackyclefveisarqtddhlhifveddgpgiahskrslvfdrgqradtlrpgqgvglavareiteqyagqiiasdsllggarmevvsgrqhptqkee* +>16 +mtiqkrlleaveqkllrpidaqfaltvagnddpavtlaaallshdageghvclplsrltlteeahpllvawisetatpidwkkrllasaavscgdspaplilcgdrlylnrmwcnertvarffnevnqaiavdedqlsrildalfpptdevnwqkvaaavaltrrisvisggpgtgktttvakllaaliqmadgercrirlaaptgkaaarlteslgaalrqlpltdaqkkripedastlhrllgaqpgsqrlrhhagnplhldvlvvdeasmidlpmmsrlidalpphgrviflgdrdqlasveagavlgdicayvnagftaerarqlsrltgsaipagagtqaaslrdslcllqksyrfgsdsgigklaaaincgdrsaiqavfqqgfsdiekrtlqssddyagmldealagygrylrllhekaapeailqafneyqllcalregpfgvrglndrieqamvqqrkiqrhphsrwyegrpvmiarndsalglfngdigialdrgqglrvwfvmpdgtiksvqpsrlpehdttwamtvhksqgsefdhaalilpsqrspvvtrelvytavtrarrrlslyaderilagaivtrterrsglatlfdevsrig* +>17 +mqevamssqeaskmlrtyniawwgnnyydvnelghisvcpdpdvpearvdlaklvkareaqgqrlpalfcfpqilqhrlrsinaafkraresygyngdyflvypikvnqhrrvieslihsgeplgleagskaelmavlahagmtrsvivcngykdreyirlaligekmghkvylviekmseiaivleeaerlnvvprlgvrarlasqgsgkwqssggekskfglaatqvlqlvetlrdagrldslqllhfhlgsqmanirdiatgvresarfyvelhklgvniqcfdvggglgvdyegtrsqsdcsvnyglneyanniiwaigdaceehglphptvitesgravtahhtvlvsniigverneytdptapaedapralqnlwetwqemhkpgtrrslrewlhdsqmdlhdihigyssgafslqerawaeqlylsmchevqkqldpqnrahrpiidelqermadkmyvnfslfqsmpdawgidqlfpvlplegldqvperravllditcdsdgaidhyidgdgiattmpmpeydpenppmlgffmvgayqeilgnmhnlfgdteavdvfvfpdgsvevelsdegdtvadmlqyvqldpktllthfrdqvkqtdlddalqqqfleefeaglygytylede* +>18 +mnslpqrstdfelttsqdgfalswqqrlilrhsaenpclwigagvadidmfrgnfsikdklnekialteatvselpdgwlvqfsrgatisatlrisadeagrltldlqnddlhhnriwlrlaanpddhiygcgeqfsyfdlrgkpfplwtseqgvgrnktsyvtwqadckenaggdyyltffpqptfvstqkyychvdnscymnfdfsapeyhelalwedkttlrfecadtyiallekltallgrqpelpdwvydgvtlgiqggtevcqqkldnmrnagvkvygiwaqdwsgirmtsfgkrvmwnwkwnsdnypqldsrikqwkeegvqflsyinpyvasdkdlcaeaarhgylakdatggdylvefgefyggvvdltnpeaydwfkdvikknmialgcsgwmadfgeylptdtylhngvsaelmhnawpalwakcnyealqktgklgeilffmragytgsqkystmmwagdqnvdwslddglasvvpaalslamtghglhhsdiggyttlfdmkrskelllrwcdfsaftpmmrthegnrpsndwqfdgdaetiahfarmttvfttlkpylkqavaqnaatglpvmrplflhyendaatytlkyqyllgqdllvapvheqgrcdwtlylpedhwvniwtgeahhggeisvdapigkppvfyraksewallfaslrni* +>19 +mkhlrvvacmimlalagcdnndktapttkseapavaqpspaqdpsqlqklaqqsqgkaltlldaseaqldgaatlvltfsipldpeqdfsrvvhvvdkksgsvdgawelapnlkelrlrhlepervlvvtvdpavkalnnatfgksyektittrdvqpsvgfasrgsllpgkiaeglpvmalnvnhvdvnffrvkpgslasfvsqweyrsslsnwesdnllkmadlvytgrfdlnparntreklllplsdikplqqagvyvavmnqaghynysnaatlftlsdigvsahryhsrldiftqslengaaqsgieivllndkgqtlaqatsdaqghvqleadkaaalllarkegqttlldltlpaldlsefnvagapgyskqffmfgprdlyrpgetvilngllrdsdgktlpdqpvklevvkpdgqvmrtvvsqpenglyrlnypldinaptglwhvrantgdnllrswdfhvedfmpermalnltaqktplapadevkfsvvgyylygapangntlqgqlflrplrdavaalpgfqfgniaeenlsrsldevqltldkggrgevsaasqwqeahsplqvilqasllesggrpvtrrveqaiwpadtlpgirpqfaakavydyrtdttvnqpivdedsnaafdivyanaqgekkavsglqvrlirerrdyywnwsesegwqsqfdqkdlvegeqtldlnadetgkvsfpvewgayrlevkapnetvssvrfwagyswqdnsdgsgaarpdrvtlkldkanyrpgdtmklhiaapvagkgyamvessdgplwwqaidvpaqgleltipvdktwnrhdlylstlvvrpgdksrsatpkravgllhlplgddnrrldlalespakmrpnqpltvrvkasvkhgempkqinvlvsavdsgvlnitdyatpdpwqaffgqkrygadiydiygqviegqgrlaalrfggdgddltrggkppvnhaniiaqqaqpitlneqgegvvtlpigdfngelrvmaqawtaddfgrgeskvvvaapviaelnmprflaggdvsrlvldvtnltdrpqtlnialaasgllellsqqpqpvnlapgvrttlfvpvralegfgegeiqatisglnlpgetlgaqhkqwqigvrpawpaqtvnsgialapgeswhvpeqhlanvspatlqgqlllsgkpplnlaryirelkaypygcleqttsglfpalytnaaqlqslgitgdsdekrraavdigisrilqmqrdnggfalwdengaeepwltayamdflirageqgysvppeainrgnerllrylqdpgtmlirysdntqastfaaqayaalvlarqqkaplgalreiwerrsqaasglplmqlgialntmgdarrgeeaitlalntprqderqwiadygsslrdnalmlslleennlrpdaqnallsslseqafgqrwlstqennalflaahsrqasagawqaqtsleaqplsgdkaltrnldadqlaalevtntgsqplwlrldssgypssapepasnvlqierqilgtdgqrkslsslrsgelvlvwltvvadrnvpdalvvdllpaglelenqnladssaslpesgsevqnllnqmqqadiqymefrddrfvaavvvnegqpvtlvylaravtpgtyqlaqpqvesmyapqwratgasegllivtp* diff --git a/tests/test_data/outputs/db/blast/protein/protein.pdb b/tests/test_data/outputs/db/blast/protein/protein.pdb new file mode 100755 index 0000000000000000000000000000000000000000..d8c528067647b6400a5fdf8d0313c7a2cad70be2 GIT binary patch literal 20480 zcmeI%K}y3w6adg~6q;Q`dIxdm5xk0v77Ch(i0fXLc8LzHSR`qJ}w(a(SGJN_5SK!5-N0t5&UAV7cs0RjXF z=U(BT*bOt>;5yXkpKY#1PBlyK!5-N0t5)0q`;)BI<@{! w&&|thQOEv|fUf`FR2%*ONxoY}6Cgl<009C72oNAZfB=E9K)0X?8GE&u=k literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/db/blast/protein/protein.phr b/tests/test_data/outputs/db/blast/protein/protein.phr new file mode 100755 index 0000000000000000000000000000000000000000..5c45aa957a4f0d1cd7818e3a55a2b3d1e2230b5f GIT binary patch literal 1290 zcmajdF%E(-07l`qf(U}5Zl0n=5M50d6B83J(5v()=;$fcK!PObV=7I)*X7q<`^Imj z7UKRsKHJ~;baB_4b!YpzZoHD>`+M{ky7LD7ATZ(!@Cx4FfuDdc!3W?g@FDm;_$l}r zd<6ageg^&sKK}2Op#dfopasq;Kod+UKpV^`KqFjGfL6Gq0L?I`0PS!^0UF|(0<^@< GkH8C-tr(mD literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/db/blast/protein/protein.pin b/tests/test_data/outputs/db/blast/protein/protein.pin new file mode 100755 index 0000000000000000000000000000000000000000..d82020df7a61c1140cdf102496cf9ef7e1c9a30b GIT binary patch literal 280 zcmXw!%L)Nu7>3_5PLU+aO8rSznh}z$MixqmJJ5`wCS%BJ@#8meLIv$;B%j^};L`CDbcq)uh{S5PLV!7$le zIHH^rg?ynTM5S6PidyU25lF04t^@c-KTo6GlB+vf(r6h=j6EP4*^(O|Ga=W>j@$vS zpESsxJh*-cZj%fxHFz=H@Nb;3E+45IQrF@9HV|-t-~|Y!dBhWloPp>Gh`qV$ikC2- GI(z`nvn#>? literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/db/blast/protein/protein.pjs b/tests/test_data/outputs/db/blast/protein/protein.pjs new file mode 100755 index 0000000..fd87ea1 --- /dev/null +++ b/tests/test_data/outputs/db/blast/protein/protein.pjs @@ -0,0 +1,22 @@ +{ + "version": "1.2", + "dbname": "protein", + "dbtype": "Protein", + "db-version": 5, + "description": "locidex_db/blast/protein/protein.fasta", + "number-of-letters": 7547, + "number-of-sequences": 20, + "last-updated": "2024-06-06T08:43:00", + "number-of-volumes": 1, + "bytes-total": 46334, + "bytes-to-cache": 7848, + "files": [ + "protein.pdb", + "protein.phr", + "protein.pin", + "protein.pot", + "protein.psq", + "protein.ptf", + "protein.pto" + ] +} diff --git a/tests/test_data/outputs/db/blast/protein/protein.pot b/tests/test_data/outputs/db/blast/protein/protein.pot new file mode 100755 index 0000000000000000000000000000000000000000..754c9ab7fa1e8f21ae1d5ba10426f2e2f8bdb1c3 GIT binary patch literal 248 zcmb`&w*i14001yMq5>BFzm6_3`Gk)z<}97Nd5mm+Q*q0jk>Fn7qGAI;{4U;dyg8N*1X+{(dC$PK4%?O z4dVa()uK`Nc22k3x|dXmZuxz3ziziBvcuXbJ69$6)7(fS<(zg3az5vH#m>X|U{6@B z^nS(}J*f6Hgy*}`=9*JyDK%vu;s`2n9d~E!LG}t^WMB62KIZ064!sq1(aVyj?gr8P z)e*9kZ-_%6E#k6JAh%cACxU`%;*L8x|ST>&Y$#aZe;Up7YuqI#j0<%2Zk@x7@r-4ei}H z=P52(WlQwyOZ`OU(?2&(*SXeNg&K{%+$dqFYpw}(`Nq=zPG4RrSy{T;$efnGm9|`7 zYv%Zc^2*q}k7`FNWgoB3Rl=c+sqPRi~%E{D>}+;u1~&L!98r#CT@5)o$`8+;`3;) z>07JqGhH6b?aR-_pz99Mt^Na$nOEdSNomR8H2GxRsijZ%%g0U!WTNlk{Yq1x3!I2q z^Ah6eHV?8IFU`OjL`h-vrstqxb61r-Tg`D#$|M_bgaTwJw9j)L@!{m{iwj3i&m2Zr(Z|^4 z=4Vd>#T*{!&7Hm5qbm=oljAsMsN6H?kTbBSTne;8$>D(n3fzKdxW&}NFt_G|)$W8= z{3@>L84G9}=jrxtWgMt!_fE(73>j27&%?FFb!BX4SoK|Pxk&CSgJHK^niy!vFKV5VNb%MH`k1O5+{kYvX7hAUNOVF zNMGc1{7LSh#?6)Eg4^6)&&vC@n2%9E)#BE9We6 zjPU?ZiO>+zaN0nncvQFP0mn#;9}tFHed^oIdi5G}*DlBr@f)F;gF<M2xZ^v$Yd9|h0SwZ*u2X=!r% zn^Xuv&YT{)DZ$rOaWFGnd`DUVDhIVtf9orr7?D1p(HUVnjbF$@fv73^l{0g@P0jUP=^>O^5ZJlq&nbG_@qFO9f%zXC@16#Zm>Im&6(KxM1{H=!MxejFMLqAhoLkh zVbNh5ra8x=gYk7kVK`?BG3D;WTV@f?y{2Fj(7yohPpGcVd3u&{dsuAMJ~ae={_o_@Zf31BNJ>2 z%nl-FeAq~rLkYKr0C;Rn&#WDSyK}~`i~xP*@J@z)sUDNYOd8tgGP+8&$B@LEkOhn< zp9HKRtafmo1#U{WhX#>SM4!3zV-6E0KQIpwQpDymuMCSd`>Y8s#UkQ*9^{1_{1xJP#Su2@4{ z{x%DG_n3`Phq_=6NnToZlbAvdy8-si?H?47L@|Ts9~bIQP|#`lb+MA~pO1Pe>Pi8)OlNG&s~FYs0cnP+tp}b(`xsrwqA_!21>pD) zQ8Jxwj7VA>5=(P*a5{|eY@t+3P%d=jf!e_Vj6jCK6{cHt!o^rnK1~S#SVxHRCzP!q zLC_2LSk-521tJU?41C7{zfdACB`Us8@#_2*&~Fd=69P&ppd{vH74zfJrod%35S@ew zwlN6we1`#F4itU)g$9b72phl4f+euRE9l)92p zGs;VD;YNUs0PGTXB0c97NbHpM;ugRPW5~>lhkltMOalNWh0J=t7ADW0xcY!-I;^M< z5F@3-X=`M*2o~)QTmTQ@AV72BQ4o1T99ZMs9F!yFQ62Veegp#kPCzV1Je z-5xzJ6Lt5P0op<#EbFZS>!rjc@tLR<_J_5>su|l_EB7;)bmj%kz)C`?c&vr^$S=3# z&pm9Ljc@o7l2ruuVWwaq@(vR{sXEDWl=Uz~^Z+qogq_RH3c`yKI@~D>2oLW<7XzN~ z^b96vP2vQZ1SSYT;AU9Q3=dK_m|8L2Gf0nOZik#oSi+c%fma(C?X<|$Fgw6xu$woh zh8AJ{!4268oG{l+N$sAU31NvH0N#QOzV~&^YAOpW5llrXHLhdKg)1QqqQh8Fi)6^* z(#O7q=5cRq)I?vfH?Rfzi;J4sV&*_^>RN*WljkQpi_9<*!-)N-Va*dQ{<`nzr)i~a z9dOZrm>fx?P>lFR615H*i2ES4M@x!fL8RR3MGn<-0Z|F%$siz;lL#avTU3D&#Tr5g zG#u{^p3X;xDFnG^;KAB3%P_1tLwZpcW+I!&(q35qVB)B>QKVHxh;r<6c+V=4%c9b- zf~DV|CXpFYEZP={Obe8@jNX}vFg z7E3^YTH~?y=a&Z;KH*3)q|{*7UXeex60E+k=rIy<*ban@F|P>$LDyg-e{i5+laZrZ zi%^K%IXupI>=}G72*gg2vBV@m&uD|1degE<#}!;`HU{1}4tLx^FY%If(Z*D=RV3Xp zEC73=F`VT?A(&h~0waYaa>HCatGG}Y7*2$*L(RMp?*@A*mU)me{pSA+D|(Buqu`#p zCLAIzA6zunTNqhkvB#TU^r&RPP#}N{*LY;MvezTwv$gyR%P5=>>cKj#kIp&(bu5ns z;PGvJwlDB3f~a4@u+m%8URF`zgZ&@p#BgDaptxyU&+hMe><2d4m-WjI=Npi`ZFv`Xn zDKOR?rCXx$urbvkv#?mQB_qRtyup_2_Ba_XZG&I-V?7LR%u_8xuoGjRc*O-hYjV^3AD&E{3*TmmX%#`vDQiulIBC! zEV^yh)9f43yb=l%unhVnu*jlIO5UfE`{o zrXiVHh{tD9YIHS`^-0C}1NwtHnDWE-m^zo4DZC+ukv$O$Hj?u5xLSnXwid*@;H}i; zbV4fMsUT~-1`#L8DcgZLp(I#g1-3p2AJZ}z{}UiJBfZ9D7*wJF-(g(HoZJVBdZUnh z!!RSm`k-4V%)Uo~GgwVRN-ztK>u<~^JY`$|l%UWk$K0_G8NoPGzz ztb{Cep;G|;*xL)G!G7WhEZ7mFZF$KGj>8f8n3%%ka#vifeIK$;v9ATlUu*%9Pt?YI zdSjLLc#Q9SDDewpOlR0$@E=`gOU`sd>JTnm$0y)T$!bh!9Hb3=ib@d_96v6WlGHlcj#x<0I@s zJb`^Xt=HmSN4^L*2qzAb*|F6IM8gQOd&J^}F^iN{ho%d@iged^f-sIQH!bK;D3rFAgX;#*Kv}km61_n9Pb6qx36F zSNpn#JGEWm@JVfXEKI-;`W#r(LG)vHZu@T{6>CA85b`Nw22=18+b^KTj*d_EC7UG+ zKcyi|6$=Tzcws|F@!%Z#OND?fttZ#oMu+3s2HB(uxI(BNL-jgK{La3Ft)Gs}A+R?a z?wIyn8YjVcWK(D?Lc@1YSi@p{p+HqW9@Fr1T_$d!a|DnwR0}#*Gvzd|71X z{g6z)-S85T*RV^r-#=duIdzJkyQgkAju_6m?CR~ezpidNTg?3z?>t+V5+Fc;009C7 z2oNAZfB*pkw+qz1`!D+c`t+KWnseOrpYYhnoWK3cJb?fK0t5&UAV7cs0RjXF5J-XS zzayHM#HQV}xgruEK!5-N0t5&UAV7cs0RsO);N@6E6QfwqKVBEpx7F%>xjxnP9Q72v tW~JgB=Xra^(*N(D`o%y literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/db/blast/protein/protein.pto b/tests/test_data/outputs/db/blast/protein/protein.pto new file mode 100755 index 0000000000000000000000000000000000000000..91d3a927c3e718edb4b7b0774fed440a3c193069 GIT binary patch literal 84 vcmXBF2?~H9002QdP16qZ|8H%;g<*RbO*XTzvau66NMue5m5ZB)m&V5r8>|4+ literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/db/config.json b/tests/test_data/outputs/db/config.json new file mode 100755 index 0000000..33bc44e --- /dev/null +++ b/tests/test_data/outputs/db/config.json @@ -0,0 +1,12 @@ +{ + "db_name": "Locidex Test Database", + "db_version": "1.0.0", + "db_date": "2024/06/06", + "db_author": "James Robertson", + "db_desc": "Using Salmonella GCA_000007545.2 as source data", + "db_num_seqs": 20, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" +} \ No newline at end of file diff --git a/tests/test_data/outputs/db/meta.json b/tests/test_data/outputs/db/meta.json new file mode 100755 index 0000000..b0c01b1 --- /dev/null +++ b/tests/test_data/outputs/db/meta.json @@ -0,0 +1,455 @@ +{ + "info": { + "num_seqs": 20, + "is_cds": "True", + "trans_table": 11, + "dna_min_len": 71.4, + "dna_max_len": 3454.5, + "dna_min_ident": 80, + "aa_min_len": 23.8, + "aa_max_len": 1151.5, + "aa_min_ident": 64 + }, + "meta": { + "0": { + "seq_id": 0, + "locus_name": "locus_1", + "locus_name_alt": "SALM_11273", + "locus_product": "!", + "locus_description": "hypothetical protein", + "locus_uid": "1", + "dna_seq_len": 102, + "dna_seq_hash": "d17b02d12afa7f832ee37df6f24a8f55", + "aa_seq_len": 34, + "aa_seq_hash": "84ac553cb45dd790c497c27152888f02", + "dna_min_len": 71.4, + "dna_max_len": 132.6, + "aa_min_len": 23.8, + "aa_max_len": 44.2, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "1": { + "seq_id": 1, + "locus_name": "locus_2", + "locus_name_alt": "SALM_120", + "locus_product": "@", + "locus_description": "outer membrane protein", + "locus_uid": "B", + "dna_seq_len": 285, + "dna_seq_hash": "e35184c8ff18e9116fc8faef20532f56", + "aa_seq_len": 95, + "aa_seq_hash": "2d9e7f7f0d11bf02db860b0799e7924d", + "dna_min_len": 199.5, + "dna_max_len": 370.5, + "aa_min_len": 66.5, + "aa_max_len": 123.5, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "2": { + "seq_id": 2, + "locus_name": "locus_3", + "locus_name_alt": "SALM_2016", + "locus_product": "#", + "locus_description": "tRNA (guanosine(46)-N7)-methyltransferase TrmB", + "locus_uid": "C", + "dna_seq_len": 327, + "dna_seq_hash": "670705cd2a59c4a23a897ac656a888fe", + "aa_seq_len": 109, + "aa_seq_hash": "4277f8bbf1fd6682001da089fea83d04", + "dna_min_len": 228.9, + "dna_max_len": 425.1, + "aa_min_len": 76.3, + "aa_max_len": 141.7, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "3": { + "seq_id": 3, + "locus_name": "locus_4", + "locus_name_alt": "SALM_8644", + "locus_product": "$", + "locus_description": "AZ624_004720", + "locus_uid": "AZ624_004720", + "dna_seq_len": 417, + "dna_seq_hash": "ac1b21798c0f672ad26f5a91ea278590", + "aa_seq_len": 139, + "aa_seq_hash": "0c25367401155278f34832f184ab44e6", + "dna_min_len": 291.9, + "dna_max_len": 542.1, + "aa_min_len": 97.3, + "aa_max_len": 180.7, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "4": { + "seq_id": 4, + "locus_name": "locus_5", + "locus_name_alt": "SALM_1876", + "locus_product": "%", + "locus_description": "SPI-1 type III secretion system invasion lipoprotein InvH", + "locus_uid": "E", + "dna_seq_len": 444, + "dna_seq_hash": "d00defcca8588f21ce16fa1d0ac13389", + "aa_seq_len": 148, + "aa_seq_hash": "c8bf12a8057fc5e541fcd4924136a40d", + "dna_min_len": 310.8, + "dna_max_len": 577.2, + "aa_min_len": 103.6, + "aa_max_len": 192.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "5": { + "seq_id": 5, + "locus_name": "locus_6", + "locus_name_alt": "SALM_640", + "locus_product": "^", + "locus_description": "MOSC domain-containing protein", + "locus_uid": "F", + "dna_seq_len": 543, + "dna_seq_hash": "a11561f2804e2c32c78049f8b9aeb517", + "aa_seq_len": 181, + "aa_seq_hash": "1f6a45ea291940ef2c17ec1cfdd5fbdd", + "dna_min_len": 380.1, + "dna_max_len": 705.9, + "aa_min_len": 126.7, + "aa_max_len": 235.3, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "6": { + "seq_id": 6, + "locus_name": "locus_7", + "locus_name_alt": "SALM_1501", + "locus_product": "&", + "locus_description": "India: Vellore", + "locus_uid": "G", + "dna_seq_len": 606, + "dna_seq_hash": "dc94bf1ec4ff9bed2a1f460cbd958656", + "aa_seq_len": 202, + "aa_seq_hash": "62252b3326997117f127efb88ff09294", + "dna_min_len": 424.2, + "dna_max_len": 787.8, + "aa_min_len": 141.4, + "aa_max_len": 262.6, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "7": { + "seq_id": 7, + "locus_name": "locus_8", + "locus_name_alt": "SALM_756", + "locus_product": "*", + "locus_description": "DNA polymerase III subunit delta'", + "locus_uid": "H", + "dna_seq_len": 642, + "dna_seq_hash": "7ebe74afecf146ec4db816c8deced64f", + "aa_seq_len": 214, + "aa_seq_hash": "2449629747a7c58f0f2cad411db87178", + "dna_min_len": 449.4, + "dna_max_len": 834.6, + "aa_min_len": 149.8, + "aa_max_len": 278.2, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "8": { + "seq_id": 8, + "locus_name": "locus_9", + "locus_name_alt": "SALM_7353", + "locus_product": "1", + "locus_description": "fimbrial assembly chaperone", + "locus_uid": "I", + "dna_seq_len": 684, + "dna_seq_hash": "41ebb36872854b2b33c8c028e23d8ad1", + "aa_seq_len": 228, + "aa_seq_hash": "a75a93991228940fb46d917f238beb5a", + "dna_min_len": 478.8, + "dna_max_len": 889.2, + "aa_min_len": 159.6, + "aa_max_len": 296.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "9": { + "seq_id": 9, + "locus_name": "locus_10", + "locus_name_alt": "SALM_1891", + "locus_product": "200.96", + "locus_description": "5'/3'-nucleotidase SurE", + "locus_uid": "J", + "dna_seq_len": 762, + "dna_seq_hash": "fe04d17ec353c08b903c85fc0ca4dc02", + "aa_seq_len": 254, + "aa_seq_hash": "bd09702e070040e0fc8d2ec3b830812c", + "dna_min_len": 533.4, + "dna_max_len": 990.6, + "aa_min_len": 177.8, + "aa_max_len": 330.2, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "10": { + "seq_id": 10, + "locus_name": "locus_11", + "locus_name_alt": "SALM_1452", + "locus_product": "|", + "locus_description": "1-phosphofructokinase", + "locus_uid": "K", + "dna_seq_len": 858, + "dna_seq_hash": "5b128d659955716833ce42f2bb060212", + "aa_seq_len": 286, + "aa_seq_hash": "4daecf1a6ccae76e1d97a4ce9ee4ff0b", + "dna_min_len": 600.6, + "dna_max_len": 1115.4, + "aa_min_len": 200.2, + "aa_max_len": 371.8, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "11": { + "seq_id": 11, + "locus_name": "locus_12", + "locus_name_alt": "SALM_11020", + "locus_product": "_", + "locus_description": "1", + "locus_uid": "L", + "dna_seq_len": 972, + "dna_seq_hash": "eb72da68c159497d5f0c8eeddc51b5ae", + "aa_seq_len": 324, + "aa_seq_hash": "a1f16dd269dc295e715f2d00f9c26b43", + "dna_min_len": 680.4, + "dna_max_len": 1263.6, + "aa_min_len": 226.8, + "aa_max_len": 421.2, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "12": { + "seq_id": 12, + "locus_name": "locus_13", + "locus_name_alt": "SALM_1934", + "locus_product": "-", + "locus_description": "3.9", + "locus_uid": "M", + "dna_seq_len": 1098, + "dna_seq_hash": "8f300259dcb46224bdc1fe5273107324", + "aa_seq_len": 366, + "aa_seq_hash": "b16bc26e42f5bc4327504b1ec8b2d53d", + "dna_min_len": 768.6, + "dna_max_len": 1427.4, + "aa_min_len": 256.2, + "aa_max_len": 475.8, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "13": { + "seq_id": 13, + "locus_name": "locus_14", + "locus_name_alt": "SALM_2871", + "locus_product": "+", + "locus_description": "@", + "locus_uid": "N", + "dna_seq_len": 1281, + "dna_seq_hash": "b9060019038526aa6fc38d2f7510edc6", + "aa_seq_len": 427, + "aa_seq_hash": "ed11561b2bedaa12c7a28eb0e9346101", + "dna_min_len": 896.7, + "dna_max_len": 1665.3, + "aa_min_len": 298.9, + "aa_max_len": 555.1, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "14": { + "seq_id": 14, + "locus_name": "locus_15", + "locus_name_alt": "SALM_583", + "locus_product": "=", + "locus_description": "DMT family transporter", + "locus_uid": "O", + "dna_seq_len": 1434, + "dna_seq_hash": "bc98c2fe196a68a79036814396513a8d", + "aa_seq_len": 478, + "aa_seq_hash": "16cb2acec887d5861327e04f2705b8ce", + "dna_min_len": 1003.8, + "dna_max_len": 1864.2, + "aa_min_len": 334.6, + "aa_max_len": 621.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "15": { + "seq_id": 15, + "locus_name": "locus_16", + "locus_name_alt": "SALM_780", + "locus_product": "<", + "locus_description": "murein transglycosylase A", + "locus_uid": "P", + "dna_seq_len": 1464, + "dna_seq_hash": "16e55766c603fe33c9e75d8e81743ae2", + "aa_seq_len": 488, + "aa_seq_hash": "b20314e55f9713235e9c4ea5817b56df", + "dna_min_len": 1024.8, + "dna_max_len": 1903.2, + "aa_min_len": 341.6, + "aa_max_len": 634.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "16": { + "seq_id": 16, + "locus_name": "locus_17", + "locus_name_alt": "SALM_1937", + "locus_product": ">", + "locus_description": "GTPase HflX", + "locus_uid": "Q", + "dna_seq_len": 1836, + "dna_seq_hash": "a0d97d985483413f3c18bfe5833ae9ce", + "aa_seq_len": 612, + "aa_seq_hash": "d6012eddc7a6e8d7d40761d00ed71a5a", + "dna_min_len": 1285.2, + "dna_max_len": 2386.8, + "aa_min_len": 428.4, + "aa_max_len": 795.6, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "17": { + "seq_id": 17, + "locus_name": "locus_18", + "locus_name_alt": "SALM_1997", + "locus_product": "?", + "locus_description": "biosynthetic arginine decarboxylase", + "locus_uid": "R", + "dna_seq_len": 1914, + "dna_seq_hash": "b3021e979faa7600756c06dfadfcf14c", + "aa_seq_len": 638, + "aa_seq_hash": "e68e83956ee1d4c685571e5348c8def1", + "dna_min_len": 1339.8, + "dna_max_len": 2488.2, + "aa_min_len": 446.6, + "aa_max_len": 829.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "18": { + "seq_id": 18, + "locus_name": "locus_19", + "locus_name_alt": "SALM_9926", + "locus_product": ",", + "locus_description": "https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662", + "locus_uid": "S", + "dna_seq_len": 2037, + "dna_seq_hash": "a012eee23637b48e39b00808a057e35d", + "aa_seq_len": 679, + "aa_seq_hash": "1302fea1dbd5bb756db34e09b863ad44", + "dna_min_len": 1425.9, + "dna_max_len": 2648.1, + "aa_min_len": 475.3, + "aa_max_len": 882.7, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "19": { + "seq_id": 19, + "locus_name": "locus_20", + "locus_name_alt": "SALM_6064", + "locus_product": ".", + "locus_description": "alpha-2-macroglobulin family protein", + "locus_uid": "T", + "dna_seq_len": 4935, + "dna_seq_hash": "4461918e985715e4a2b07494e1f91326", + "aa_seq_len": 1645, + "aa_seq_hash": "12ff39a1933fa478729b142976b0a659", + "dna_min_len": 3454.5, + "dna_max_len": 6415.5, + "aa_min_len": 1151.5, + "aa_max_len": 2138.5, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + } + } +} \ No newline at end of file diff --git a/tests/test_data/outputs/db/results.json b/tests/test_data/outputs/db/results.json new file mode 100755 index 0000000..509bbcc --- /dev/null +++ b/tests/test_data/outputs/db/results.json @@ -0,0 +1,15 @@ +{ + "analysis_start_time": "2024-06-06 08:43:01", + "parameters": { + "input_file": "locidex.loci.txt", + "outdir": "locidex_db", + "name": "test", + "author": "James", + "date": "", + "db_ver": "1.0.0", + "db_desc": "desc", + "force": false + }, + "result_file": "locidex_db", + "analysis_end_time": "2024-06-06 08:43:03" +} \ No newline at end of file diff --git a/tests/test_data/outputs/extract/G1/blast/hsps.txt b/tests/test_data/outputs/extract/G1/blast/hsps.txt new file mode 100755 index 0000000..099a33c --- /dev/null +++ b/tests/test_data/outputs/extract/G1/blast/hsps.txt @@ -0,0 +1,20 @@ +0 0 102 22872 1 102 12 113 102 0 100.000 100 100 plus 1.84e-51 189 +1 0 285 22872 1 285 13891 14175 285 17 94.035 100 100 plus 2.36e-124 433 +2 0 327 22872 1 327 19133 19459 327 0 100.000 100 100 plus 5.47e-176 604 +3 0 417 22872 1 417 19471 19887 417 11 97.362 100 100 plus 0.0 710 +4 0 444 22872 1 444 19899 20342 444 15 96.622 100 100 plus 0.0 737 +5 0 543 22872 1 543 20354 20896 543 0 100.000 100 100 plus 0.0 1003 +6 0 606 22872 1 606 20908 21513 606 15 97.525 100 100 plus 0.0 1037 +7 0 642 22872 1 642 21525 22166 642 0 100.000 100 100 plus 0.0 1186 +8 0 684 22872 1 684 22178 22861 684 0 100.000 100 100 plus 0.0 1264 +9 0 762 22872 1 762 125 886 762 0 100.000 100 100 plus 0.0 1408 +10 0 858 22872 1 858 898 1755 858 19 97.786 100 100 plus 0.0 1480 +11 0 972 22872 1 972 1767 2738 972 0 100.000 100 100 plus 0.0 1796 +12 0 1098 22872 1 1098 2750 3847 1098 0 100.000 100 100 plus 0.0 2028 +13 0 1281 22872 1 1281 3859 5139 1281 11 99.141 100 100 plus 0.0 2305 +14 0 1434 22872 1 1434 5151 6584 1434 0 100.000 100 100 plus 0.0 2649 +15 0 1464 22872 1 1464 6596 8059 1464 15 98.975 100 100 plus 0.0 2621 +16 0 1836 22872 1 1836 8071 9906 1836 0 100.000 100 100 plus 0.0 3391 +17 0 1914 22872 1 1914 9918 11831 1914 0 100.000 100 100 plus 0.0 3535 +18 0 2037 22872 1 2037 11843 13879 2037 16 99.215 100 100 plus 0.0 3674 +19 0 4935 22872 1 4935 14187 19121 4935 0 100.000 100 100 plus 0.0 9114 diff --git a/tests/test_data/outputs/extract/G1/blast_db/contigs.fasta b/tests/test_data/outputs/extract/G1/blast_db/contigs.fasta new file mode 100755 index 0000000..3e6b682 --- /dev/null +++ b/tests/test_data/outputs/extract/G1/blast_db/contigs.fasta @@ -0,0 +1,2 @@ +>0 +nnnnnnnnnnnatgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaannnnnnnnnnnatgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaannnnnnnnnnngtgaacctggtgaaaaccacgggtctgcacgcggcgggcaaaggcattaacgttgccaaagtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccgacttcaacttttccggctttgacgtgactcccgcagactgggaacgctttgttaacgactccctgagctggctgggtcagttcgatatggtctgcgtcagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatctttgatagtagccgtgaagcgttagtcgccggtcttaaagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcacgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgtgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggtaagtaccgtcggcgcaggcgattccatggttggcggattgatttacggcctgctgatgcgcgagtccagcgaacatacgctgcgtctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactgannnnnnnnnnnatgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatgannnnnnnnnnnatgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctgannnnnnnnnnnttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctcttccgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccaacgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcagaaaggcgggattggtttgcgcggtccgggtgaaacccagctcgaaaccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgaaaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgctgctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaaatccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtctgcaggtacgaatgccgattgtcgactggcgtcgcctctgtaaacaagaaccagcgttgatcgaatacgtgatctagnnnnnnnnnnngtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatgannnnnnnnnnnatgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctggtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatcagcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacgctgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcattcaaccggaatggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaacgctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgaggcgctggcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtgagctgaccagccttgtgcgcaaccttaatcaactgctcaaaagcgagcgcgaacgttataacaaataccgcacgaccctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatcagcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcataccccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttttggccgacagcatcccacacagaaagaggaataannnnnnnnnnnatgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataannnnnnnnnnnatgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtagnnnnnnnnnnnatgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacattgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccaccgtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggccgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattactggaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcattcagggcggtacggaagtttgtcagcaaaaactggataccatgcgcaacgcaggcgtaaaagtgaacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggcgtcagcgcagagatcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacaggagaccggcaagctcggcgagatcctgttctttatgcgtgcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccggcaataactggcagttcgacggcgacgcggaaactattgcccactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgaaattaccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaannnnnnnnnnnatgaaactttttctgacaacggcagcgctaactgcgaccttgatttccggcatggcatttgccagcgatcccgtcatcccctgggcgactaacagcggcggtacggaaagcacgcatattgcggcaatgggcgaggatttgaatgcgcagcatcagcagatcacccacacgcatgaaggcgtatgggcggccaattccggcagcattcaggctgatgaagccgcgctgaccagcaacaagccgccggtacaaggccacccggaattgatgccgcatcagggctaannnnnnnnnnnatgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaannnnnnnnnnnatggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaannnnnnnnnnnctgaaactaatacctttttatttgctcgcgctattttctgccgcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaaccactgcagataacccgccttataccatgacaatattgccgccagtagaaggctgcggggaaaaatgtgtgaaactgaacgtacagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaaggacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctataaccgacagctgcaattactttgggctggcgtaataccgaaaaataaaaaggggaaatggataacgaaaatggaaaattactggttcaggcaggtaaaagcgcactaannnnnnnnnnnatgaaaaaattttatagctgtcttcctgtctttttactgatcggctgtgcccaggtgcccctcccttcctccgtgagcaaaccggtacagcaacctggcgctcagaaagagcaactggccaacgcaaatagtattgatgagtgtcagtctcttccgtatgtgccgtcagaccttgcgaagaataaatcattatcaaaccagaacgctgataattccgcatcaaaaaatagcgcaatcagctcaagcattttttgcgaaaaatataaacaaaccaaagaacaggcgcttaccttcttccaggaacatccacaatacatgcgttcgaaagaagatgaagagcaactcatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataannnnnnnnnnnatgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaannnnnnnnnnngtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggcctcacaacatgcctggagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgtcagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcaccatccagtcggcaacctggttttccgcaggcaggtcgcttacggcggataaaaaaatgatggattgcggcagcggtatttatgcttcaatcaatacgttactgaaaaagtcgcagaataaaaatatagtcatttttacccataatcattgtttgacgtatattgcaaaaaataagcgcggcgtgaagtttgatccggactatcttaatgcgctcgttatgtatgcggaaaacggaaaactctttttagacggtgaatttgttcccggctagnnnnnnnnnnnatgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatgannnnnnnnnnnatgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctagnnnnnnnnnnn diff --git a/tests/test_data/outputs/extract/G1/blast_db/contigs.fasta.ndb b/tests/test_data/outputs/extract/G1/blast_db/contigs.fasta.ndb new file mode 100755 index 0000000000000000000000000000000000000000..5fd6f7085890c929e9fa647574a304e6e7c1d317 GIT binary patch literal 20480 zcmeI&O-jR15CGt@3T7h_@eblfJc3sVVx^$TqQ!Nu)s6HdF1xii(B|hgQK1OcMd8aL zZ(j2%Z#3(Lfo-gxxPK5vg0t5&UAV7cs0RjXF5cpex+fBMroW@J_R2I(m+4ypqiHV!bpVKM|-$ z5Jf;QFC_^mDCSj~r(kHHqhMrUWTK#8XlP|%q2TBXlwpjBgjp0B0cAOW#DSVN05MQZ GYy$wztRBPw literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G1/blast_db/contigs.fasta.njs b/tests/test_data/outputs/extract/G1/blast_db/contigs.fasta.njs new file mode 100755 index 0000000..a6073cf --- /dev/null +++ b/tests/test_data/outputs/extract/G1/blast_db/contigs.fasta.njs @@ -0,0 +1,22 @@ +{ + "version": "1.2", + "dbname": "contigs.fasta", + "dbtype": "Nucleotide", + "db-version": 5, + "description": "locidex/extract/G1/blast_db/contigs.fasta", + "number-of-letters": 22872, + "number-of-sequences": 1, + "last-updated": "2024-06-10T11:08:00", + "number-of-volumes": 1, + "bytes-total": 42908, + "bytes-to-cache": 5952, + "files": [ + "contigs.fasta.ndb", + "contigs.fasta.nhr", + "contigs.fasta.nin", + "contigs.fasta.not", + "contigs.fasta.nsq", + "contigs.fasta.ntf", + "contigs.fasta.nto" + ] +} diff --git a/tests/test_data/outputs/extract/G1/blast_db/contigs.fasta.not b/tests/test_data/outputs/extract/G1/blast_db/contigs.fasta.not new file mode 100755 index 0000000000000000000000000000000000000000..d6562660b009ba390419e760e6e10a80c529e3d8 GIT binary patch literal 20 OcmZQ%fB;4)4Wa-5Gynqt literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G1/blast_db/contigs.fasta.nsq b/tests/test_data/outputs/extract/G1/blast_db/contigs.fasta.nsq new file mode 100755 index 0000000000000000000000000000000000000000..df1c2f87197cce6bcf0fa72284f127d4470647e7 GIT binary patch literal 5808 zcmWModpMJe1OD8`M0Mmg&QA)Ht{m4Pm*b>12SrCVCv9ijx6QEQsU!O&ccqxMY4&Ye ziax2Ka)}+e)S=TkNvUL%=Dtx%(s7Gl@AJIxAMf+L|Gw`FC4eqfM{p<*9TDwXC}SP33&W zC-xtwtd8S-N3FOHO{c;1iR~XnS7RnRtLomMRPRZi=AUS_+CpMeGFL8&j9|7my%c1* zW8^M=56d;gxKSO;@hWC#krGjOId#2Bz;J{Le?8a)4D+e&wQ0$bUT?j?pVqeJBzLJ#l$RqARDZ8vnp-bX-Rf-HdmP%23%gElj|e7Et%5$BCJyud*9VCwQh7b#z;HIYVn7 z(rPW=8N(8Y=e?Jh3o9({D11JG{4+xeQIJ-mOwt=+?{m{PdhqC;Yy43pvHw&cr$d#H z<(g^WeEaDyr&1=zP2W1mC+DNb?utH6h5WN=`mZf=+@jz?@@fPd8Vp;T>+u5vGJ+$5 zlqcMihJ1muz{^s#Uz?x^cY|78_f>_2(y@%H)eS*3bZN8sNR7KAZQ)_Au?dCBD&&m=KTCtcSR% z&jZZOJ=l1o`*~&lW)5jDr?v1*Pyvg4anolOePX z)Y+_`#MrC})vQ{emvHay+jgSy8q5wrA{e(BMOFFkjYlaoVK_1FCyNhAR%a{Z94O!L zsCwr}QX8*ve|e(>>uVUXf1#&yd(k!Kc$LR^x%|O?Z{^*aEdVS+NF9HEVh#VeC%i4U zWmME8kUW(C^mymII? zs#jZ5R6D^YHR<8|^>Q0mxVMykV?`lg?AdiAXw(P+5h#T}imdyrx@C&eP811vab(Td zZhsb7{e)$Q@0q};f3fZw^z*^;22|$%Aq8xwmA=T?cq*hBUqN?rx8)@`<#8v4iv9_S z&tUU5Ee|KTZp_;(yWvZg#)prsZ&C$#xXSW2tB7@!tzKJ=^g_Ya%e^!-GG%ngagC6k zmw$I2*@56)WSmJ6!BCo#MOt&aObD{o-pxB_z{d6t5xUk#FH8#+Pm7mA+? z=b@)OSXh-Mp{yt)^Xn1#X(EV`(YJ_NbT|9I3VA+;kZ2=8(a*Rr5kFg*!+`S$@ucCh zSvbyZkR`Z8FIlL@iNo49#vXhaOa!6kd$1`z^rBj4_H`08a0;?N0$U2>|0@%bM1-Do z-S&c2)S7n;UXGFyN28k4`^;oT14C^et-~#1Lc&2B7PlUcoPP7SW_D9?Z?*! zg8XL$7#Q?6A5v7bLu4mlo?aTOCU>Jg?xiqfCco z{+22$zVF->0d3PSn$dRq&*s^+fTdMhn2yTSdP{LcaO%i!xqCqZ@IUQ!)>&Ru9+PMG zKe?p6r<)s$DHnxwges<~rt<~)?U+}tHEP+tI%}Ee;7aPh;l~~-Jcy9`GUnHQ1Om$% z#GA9+o`E`-jG1|uCYfo;49(&};A8+6OXfHSbY95)JvtU+vI5XAN!A2AIheOyff;uB z^?b)nHV@J*+*Ll0w`28q3c-jc;2qYVAJCw_FiI&}{<|aP4S2$NPdB#xhN^p~y64$} z!ad8Ze1Njrztfu^4{c<;Gg^lByg&Q3lD>sf=r8miB=f8eS9Bh-N6kYYM!|Ym$(+v+b=*sDZ<6!)GAn0^#n=c*2=;FEX=HM|6KXmt1fVI8J-|t3)eK= zuvu>&p6|qKwzmX6OG0TU8FE!sgWPKoM*RRTRr;gEJ@Um%g%ZKdZ>_n^BUuHz?c(4g zVz|sr1%Z%wSpKuI$;}l1LJXL)X6AtULK@7AbF6tx{D7&FuxdS0an#K6SOHwr1fnt# z{LM%E<`z`-joszCn4RkhJI1^&MPx<%h@05|m+ScSjUfH+DfO1JYp###E>fec&9lJw zg^K}@3Jaq7)z~2t&!Z{`P+&Y89wa|6(xG?I_AEY zoyoZYp>On~%N8^>5N*6-I0ieK@XmWq+g$bdiwqvsoVBa^dB+^|oNVx3oaz4m5%gM2st6^WH$416g;#Izc&e$Ck<(mY5CnG6MfxG-;VU1^8texZ6$ zSk2-Fxb1kWqYypCF?!Mb#kvZ)Tcz+-$)l1(qEiP{$FHf#PY!mt4IzHw@d*QAZKp7r zU1Yv%3XItgn|9rvKB+s$QeZo>zblU>9+4SL5Mfz>ry?U(aCYo|>;)o&A<#X=W-g!``^YLBg;%O;=+-*%7T3Okb4HE2Nm8&A+(!Vd0ydh1o%D2 z&TUXh&fKnbnLBMT@=yO_WcHiRjqE_`s(#LQ=Ri9sm)?>th81oA$F97V`b9`dAuKFL$*! z_`uHDacioP?6{Xd))|EZV`Thl7{fPP0L~US;ytsGr zRJY#G3B^tAxkHR|th2$TLt+EEm4wng{oAT|?-NO4qvsjp6vr-`d)hvsfNhuugEc0B zyqRASqoL^N;O?f0xYakJ)vUY))Byk;s8 z)671x2 z3TG&!pOwed_dA?^QJ8D~wiBZ=?|v$fX!s~rXZ3hBuAy{(Ea|M}cB2=cnR|^mSpg3O4Y3$v!_(O= zBmKV|D}1R9<+ML3Ot^?Ij0rtbbw*dhw$cv%d9lJGNu?$W_UA)+FaP2g}25msS!A$H7Sd{PxW<>1v*vL z`5?Dcy|SDn{U08Pl3G`eJSrGS?4!kW{xAT!rem~YGps;(*L6QTPMT^|yCM&wA(mp0 zkTVmgt}H+!aE}dqYire(U~c~#%zP!}*?9irdl`JO(&WdGw@- z!UbYBz_?hb$3d%i-_qN1WgRa*Cc(j>40XnZqnabCyHWJq${Aq;9B3|nusCsk%KwnU zARWh)cryZ1SbSig4Ca=fQ|;|rX7y~YLw=b=@jJPO{Pb5+;siyeL>8g6kbxk-=&inAR;AtZC25kI+U8m|c_XRP` zD2>y-FZBS!YNFsZ)!!8kgyCNRh}f8wajIaoc%c#n9xH`h!E}=1~V2*?tUq zv7!aizr0IV6vz&Q=vB=Z_RS6U-G*gZZJI2xF@X|}y_?OAvo#w4@y-H`4y(E0nudGO zs{K*Lq707P>R9nE!d?gkBq;I0v)eavKcLI>L?=8E#wKOH%Dy{3Zd|?5G$L;9C6ekA zBs$?2!eIN7s8}z61%nD#mS@z@Np^xnXIAZE>6Dkw4d&J(m8cR7ujo}c821~Y;86qp zYVk)>g_oE+ah$vDrJO3;+w!QuC8GHDn&?Dj7g2UwBgNZdi31>4Y&?aH+2vH(dq$Kp z=2xL?(WTzxMPLwvYrc%i6c%H8w!`K6&vDD|1?Qw-$9rL#6qMc#Il_#U&C+mao=2 z;8~d?pgW9T#oi$|14F0WczCDsy*r;|_fZOKXJj#)dao*cOdj+)7KY`RxV%osi9lLS zHuW}qLJ=3p5{s{MM`tjnJWUUJ2V}-K4sve+xJ|>^FUmd@eNlu-zcP1hO4)j#|GrKC z@XE*GmW2`{ePq=NUukh|%24KCC&(CRe-~{V>QvNxO97!_y#?EbOnl}VrLE@5{Y2v3 zjNd!gdM3f+U&l*Gl~#6ICHyTZ1G;Lh{Q~ExV=iGEdqfd`J55{%W>hw)zv?m<68~o& z2(K)L8E!mx6R6s5<~?e^YKS~hd{?@p_t@NoyJ7Pb6z@XT0k&tTJ2Je1+38-vE%b%^ zZT#mMIxCOf&%a-LSXkK{(R}2{tC)N(wn-qwIAFks4$_S3M$gyZO^ct2IdDvD+y1;; z>i}kS1JMomf#!Az?Y~Oaao%WZU;@vk&2QUu$S90Mw&fY{PYjb+fIUeIW@WdsKMGwB za5l>htG0|e%<28Uu%7(UBF`CO$me(l7Ygfm$)5D}b(g3mT~BO%KUsk%BsM*AB1Z~Nizj?_o}f%*TD02z^&y{2R6nvm#&=mwR;(PstbEkc zD*Y<{54pjq^;J$S9o-lfn=iL|O|CdvJ%TTjhk z4Ig<@;~`cCdnTJX)e&xN2Zf^nkuvdq3@=88_z%Wfag`-P`md5 z)^^@p=Pl?RFwU$}Qh^viQO8>J~S#`;cqwQVKt zWj}v?Woj4j4Q5N<cWK(D?Lc@1YSi@p{p+HqW9@Fr1T_$d!a|DnwR0}#*Gvzd|71X z{g6z)-S85T*RV^r-#=duIdzJkyQgkAju_6m?CR~ezpidNTg?3z?>t+V5+Fc;009C7 z2oNAZfB*pkw+qz1`!D+c`t+KWnseOrpYYhnoWK3cJb?fK0t5&UAV7cs0RjXF5J-XS zzayHM#HQV}xgruEK!5-N0t5&UAV7cs0RsO);N@6E6QfwqKVBEpx7F%>xjxnP9Q72v tW~JgB=Xra^(*N(D`o%y literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G1/blast_db/contigs.fasta.nto b/tests/test_data/outputs/extract/G1/blast_db/contigs.fasta.nto new file mode 100755 index 0000000000000000000000000000000000000000..20d5cb86e6dff1f3684dc229a358a2ea697cecfb GIT binary patch literal 8 KcmZQ%fB*mh5C8%I literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G1/filtered.hsps.txt b/tests/test_data/outputs/extract/G1/filtered.hsps.txt new file mode 100755 index 0000000..65ab8d4 --- /dev/null +++ b/tests/test_data/outputs/extract/G1/filtered.hsps.txt @@ -0,0 +1,21 @@ +qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore locus_name ext_start ext_end is_5prime_complete is_3prime_complete is_complete is_5prime_boundary is_3prime_boundary is_on_boundary reverse complement is_extended is_5p_extended is_3p_extended +0 0 102 22872 0 101 11 112 102 0 100.0 100 100 plus 1.84e-51 189 locus_1 11 112 True True True False False False False True False False False +9 0 762 22872 0 761 124 885 762 0 100.0 100 100 plus 0.0 1408 locus_10 124 885 True True True False False False False True False False False +10 0 858 22872 0 857 897 1754 858 19 97.786 100 100 plus 0.0 1480 locus_11 897 1754 True True True False False False False True False False False +11 0 972 22872 0 971 1766 2737 972 0 100.0 100 100 plus 0.0 1796 locus_12 1766 2737 True True True False False False False True False False False +12 0 1098 22872 0 1097 2749 3846 1098 0 100.0 100 100 plus 0.0 2028 locus_13 2749 3846 True True True False False False False True False False False +13 0 1281 22872 0 1280 3858 5138 1281 11 99.141 100 100 plus 0.0 2305 locus_14 3858 5138 True True True False False False False True False False False +14 0 1434 22872 0 1433 5150 6583 1434 0 100.0 100 100 plus 0.0 2649 locus_15 5150 6583 True True True False False False False True False False False +15 0 1464 22872 0 1463 6595 8058 1464 15 98.975 100 100 plus 0.0 2621 locus_16 6595 8058 True True True False False False False True False False False +16 0 1836 22872 0 1835 8070 9905 1836 0 100.0 100 100 plus 0.0 3391 locus_17 8070 9905 True True True False False False False True False False False +17 0 1914 22872 0 1913 9917 11830 1914 0 100.0 100 100 plus 0.0 3535 locus_18 9917 11830 True True True False False False False True False False False +18 0 2037 22872 0 2036 11842 13878 2037 16 99.215 100 100 plus 0.0 3674 locus_19 11842 13878 True True True False False False False True False False False +1 0 285 22872 0 284 13890 14174 285 17 94.035 100 100 plus 2.3600000000000003e-124 433 locus_2 13890 14174 True True True False False False False True False False False +19 0 4935 22872 0 4934 14186 19120 4935 0 100.0 100 100 plus 0.0 9114 locus_20 14186 19120 True True True False False False False True False False False +2 0 327 22872 0 326 19132 19458 327 0 100.0 100 100 plus 5.47e-176 604 locus_3 19132 19458 True True True False False False False True False False False +3 0 417 22872 0 416 19470 19886 417 11 97.362 100 100 plus 0.0 710 locus_4 19470 19886 True True True False False False False True False False False +4 0 444 22872 0 443 19898 20341 444 15 96.622 100 100 plus 0.0 737 locus_5 19898 20341 True True True False False False False True False False False +5 0 543 22872 0 542 20353 20895 543 0 100.0 100 100 plus 0.0 1003 locus_6 20353 20895 True True True False False False False True False False False +6 0 606 22872 0 605 20907 21512 606 15 97.525 100 100 plus 0.0 1037 locus_7 20907 21512 True True True False False False False True False False False +7 0 642 22872 0 641 21524 22165 642 0 100.0 100 100 plus 0.0 1186 locus_8 21524 22165 True True True False False False False True False False False +8 0 684 22872 0 683 22177 22860 684 0 100.0 100 100 plus 0.0 1264 locus_9 22177 22860 True True True False False False False True False False False diff --git a/tests/test_data/outputs/extract/G1/processed.extracted.seqs.fasta b/tests/test_data/outputs/extract/G1/processed.extracted.seqs.fasta new file mode 100755 index 0000000..8130bd5 --- /dev/null +++ b/tests/test_data/outputs/extract/G1/processed.extracted.seqs.fasta @@ -0,0 +1,40 @@ +>locus_1:0:0:0 +atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa +>locus_10:9:0:1 +atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa +>locus_11:10:0:2 +gtgaacctggtgaaaaccacgggtctgcacgcggcgggcaaaggcattaacgttgccaaagtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccgacttcaacttttccggctttgacgtgactcccgcagactgggaacgctttgttaacgactccctgagctggctgggtcagttcgatatggtctgcgtcagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatctttgatagtagccgtgaagcgttagtcgccggtcttaaagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcacgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgtgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggtaagtaccgtcggcgcaggcgattccatggttggcggattgatttacggcctgctgatgcgcgagtccagcgaacatacgctgcgtctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga +>locus_12:11:0:3 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>locus_13:12:0:4 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>locus_14:13:0:5 +ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctcttccgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccaacgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcagaaaggcgggattggtttgcgcggtccgggtgaaacccagctcgaaaccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgaaaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgctgctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaaatccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtctgcaggtacgaatgccgattgtcgactggcgtcgcctctgtaaacaagaaccagcgttgatcgaatacgtgatctag +>locus_15:14:0:6 +gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga +>locus_16:15:0:7 +atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctggtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatcagcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacgctgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcattcaaccggaatggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaacgctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgaggcgctggcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtgagctgaccagccttgtgcgcaaccttaatcaactgctcaaaagcgagcgcgaacgttataacaaataccgcacgaccctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatcagcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcataccccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttttggccgacagcatcccacacagaaagaggaataa +>locus_17:16:0:8 +atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa +>locus_18:17:0:9 +atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag +>locus_19:18:0:10 +atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacattgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccaccgtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggccgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattactggaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcattcagggcggtacggaagtttgtcagcaaaaactggataccatgcgcaacgcaggcgtaaaagtgaacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggcgtcagcgcagagatcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacaggagaccggcaagctcggcgagatcctgttctttatgcgtgcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccggcaataactggcagttcgacggcgacgcggaaactattgcccactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgaaattaccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa +>locus_2:1:0:11 +atgaaactttttctgacaacggcagcgctaactgcgaccttgatttccggcatggcatttgccagcgatcccgtcatcccctgggcgactaacagcggcggtacggaaagcacgcatattgcggcaatgggcgaggatttgaatgcgcagcatcagcagatcacccacacgcatgaaggcgtatgggcggccaattccggcagcattcaggctgatgaagccgcgctgaccagcaacaagccgccggtacaaggccacccggaattgatgccgcatcagggctaa +>locus_20:19:0:12 +atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa +>locus_3:2:0:13 +atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa +>locus_4:3:0:14 +ctgaaactaatacctttttatttgctcgcgctattttctgccgcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaaccactgcagataacccgccttataccatgacaatattgccgccagtagaaggctgcggggaaaaatgtgtgaaactgaacgtacagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaaggacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctataaccgacagctgcaattactttgggctggcgtaataccgaaaaataaaaaggggaaatggataacgaaaatggaaaattactggttcaggcaggtaaaagcgcactaa +>locus_5:4:0:15 +atgaaaaaattttatagctgtcttcctgtctttttactgatcggctgtgcccaggtgcccctcccttcctccgtgagcaaaccggtacagcaacctggcgctcagaaagagcaactggccaacgcaaatagtattgatgagtgtcagtctcttccgtatgtgccgtcagaccttgcgaagaataaatcattatcaaaccagaacgctgataattccgcatcaaaaaatagcgcaatcagctcaagcattttttgcgaaaaatataaacaaaccaaagaacaggcgcttaccttcttccaggaacatccacaatacatgcgttcgaaagaagatgaagagcaactcatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa +>locus_6:5:0:16 +atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa +>locus_7:6:0:17 +gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggcctcacaacatgcctggagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgtcagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcaccatccagtcggcaacctggttttccgcaggcaggtcgcttacggcggataaaaaaatgatggattgcggcagcggtatttatgcttcaatcaatacgttactgaaaaagtcgcagaataaaaatatagtcatttttacccataatcattgtttgacgtatattgcaaaaaataagcgcggcgtgaagtttgatccggactatcttaatgcgctcgttatgtatgcggaaaacggaaaactctttttagacggtgaatttgttcccggctag +>locus_8:7:0:18 +atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga +>locus_9:8:0:19 +atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/extract/G1/raw.extracted.seqs.fasta b/tests/test_data/outputs/extract/G1/raw.extracted.seqs.fasta new file mode 100755 index 0000000..8130bd5 --- /dev/null +++ b/tests/test_data/outputs/extract/G1/raw.extracted.seqs.fasta @@ -0,0 +1,40 @@ +>locus_1:0:0:0 +atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa +>locus_10:9:0:1 +atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa +>locus_11:10:0:2 +gtgaacctggtgaaaaccacgggtctgcacgcggcgggcaaaggcattaacgttgccaaagtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccgacttcaacttttccggctttgacgtgactcccgcagactgggaacgctttgttaacgactccctgagctggctgggtcagttcgatatggtctgcgtcagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatctttgatagtagccgtgaagcgttagtcgccggtcttaaagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcacgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgtgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggtaagtaccgtcggcgcaggcgattccatggttggcggattgatttacggcctgctgatgcgcgagtccagcgaacatacgctgcgtctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga +>locus_12:11:0:3 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>locus_13:12:0:4 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>locus_14:13:0:5 +ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctcttccgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccaacgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcagaaaggcgggattggtttgcgcggtccgggtgaaacccagctcgaaaccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgaaaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgctgctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaaatccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtctgcaggtacgaatgccgattgtcgactggcgtcgcctctgtaaacaagaaccagcgttgatcgaatacgtgatctag +>locus_15:14:0:6 +gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga +>locus_16:15:0:7 +atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctggtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatcagcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacgctgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcattcaaccggaatggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaacgctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgaggcgctggcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtgagctgaccagccttgtgcgcaaccttaatcaactgctcaaaagcgagcgcgaacgttataacaaataccgcacgaccctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatcagcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcataccccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttttggccgacagcatcccacacagaaagaggaataa +>locus_17:16:0:8 +atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa +>locus_18:17:0:9 +atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag +>locus_19:18:0:10 +atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacattgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccaccgtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggccgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattactggaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcattcagggcggtacggaagtttgtcagcaaaaactggataccatgcgcaacgcaggcgtaaaagtgaacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggcgtcagcgcagagatcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacaggagaccggcaagctcggcgagatcctgttctttatgcgtgcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccggcaataactggcagttcgacggcgacgcggaaactattgcccactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgaaattaccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa +>locus_2:1:0:11 +atgaaactttttctgacaacggcagcgctaactgcgaccttgatttccggcatggcatttgccagcgatcccgtcatcccctgggcgactaacagcggcggtacggaaagcacgcatattgcggcaatgggcgaggatttgaatgcgcagcatcagcagatcacccacacgcatgaaggcgtatgggcggccaattccggcagcattcaggctgatgaagccgcgctgaccagcaacaagccgccggtacaaggccacccggaattgatgccgcatcagggctaa +>locus_20:19:0:12 +atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa +>locus_3:2:0:13 +atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa +>locus_4:3:0:14 +ctgaaactaatacctttttatttgctcgcgctattttctgccgcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaaccactgcagataacccgccttataccatgacaatattgccgccagtagaaggctgcggggaaaaatgtgtgaaactgaacgtacagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaaggacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctataaccgacagctgcaattactttgggctggcgtaataccgaaaaataaaaaggggaaatggataacgaaaatggaaaattactggttcaggcaggtaaaagcgcactaa +>locus_5:4:0:15 +atgaaaaaattttatagctgtcttcctgtctttttactgatcggctgtgcccaggtgcccctcccttcctccgtgagcaaaccggtacagcaacctggcgctcagaaagagcaactggccaacgcaaatagtattgatgagtgtcagtctcttccgtatgtgccgtcagaccttgcgaagaataaatcattatcaaaccagaacgctgataattccgcatcaaaaaatagcgcaatcagctcaagcattttttgcgaaaaatataaacaaaccaaagaacaggcgcttaccttcttccaggaacatccacaatacatgcgttcgaaagaagatgaagagcaactcatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa +>locus_6:5:0:16 +atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa +>locus_7:6:0:17 +gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggcctcacaacatgcctggagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgtcagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcaccatccagtcggcaacctggttttccgcaggcaggtcgcttacggcggataaaaaaatgatggattgcggcagcggtatttatgcttcaatcaatacgttactgaaaaagtcgcagaataaaaatatagtcatttttacccataatcattgtttgacgtatattgcaaaaaataagcgcggcgtgaagtttgatccggactatcttaatgcgctcgttatgtatgcggaaaacggaaaactctttttagacggtgaatttgttcccggctag +>locus_8:7:0:18 +atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga +>locus_9:8:0:19 +atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/extract/G1/seq_data.txt b/tests/test_data/outputs/extract/G1/seq_data.txt new file mode 100755 index 0000000..e2c836e --- /dev/null +++ b/tests/test_data/outputs/extract/G1/seq_data.txt @@ -0,0 +1,21 @@ +id seqid locus_name query_id qlen start end sub_start sub_ent ident qcovs bitscore reverse complement is_complete is_trunc fivep_trunc threep_trunc is_extended is_5p_extended is_3p_extended seq start_codon stop_codon is_stop_valid is_start_valid is_cds_valid +0 0 locus_1 0 102 11 113 11 112 100.0 100 189 False True True False False False False False False atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa atg taa True True True +1 0 locus_10 9 762 124 886 124 885 100.0 100 1408 False True True False False False False False False atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa atg taa True True True +2 0 locus_11 10 858 897 1755 897 1754 97.786 100 1480 False True True False False False False False False gtgaacctggtgaaaaccacgggtctgcacgcggcgggcaaaggcattaacgttgccaaagtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccgacttcaacttttccggctttgacgtgactcccgcagactgggaacgctttgttaacgactccctgagctggctgggtcagttcgatatggtctgcgtcagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatctttgatagtagccgtgaagcgttagtcgccggtcttaaagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcacgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgtgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggtaagtaccgtcggcgcaggcgattccatggttggcggattgatttacggcctgctgatgcgcgagtccagcgaacatacgctgcgtctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga gtg tga True True True +3 0 locus_12 11 972 1766 2738 1766 2737 100.0 100 1796 False True True False False False False False False atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga atg tga True True True +4 0 locus_13 12 1098 2749 3847 2749 3846 100.0 100 2028 False True True False False False False False False atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga atg tga True True True +5 0 locus_14 13 1281 3858 5139 3858 5138 99.141 100 2305 False True True False False False False False False ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctcttccgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccaacgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcagaaaggcgggattggtttgcgcggtccgggtgaaacccagctcgaaaccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgaaaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgctgctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaaatccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtctgcaggtacgaatgccgattgtcgactggcgtcgcctctgtaaacaagaaccagcgttgatcgaatacgtgatctag ttg tag True True True +6 0 locus_15 14 1434 5150 6584 5150 6583 100.0 100 2649 False True True False False False False False False gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga gtg tga True True True +7 0 locus_16 15 1464 6595 8059 6595 8058 98.975 100 2621 False True True False False False False False False atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctggtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatcagcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacgctgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcattcaaccggaatggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaacgctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgaggcgctggcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtgagctgaccagccttgtgcgcaaccttaatcaactgctcaaaagcgagcgcgaacgttataacaaataccgcacgaccctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatcagcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcataccccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttttggccgacagcatcccacacagaaagaggaataa atg taa True True True +8 0 locus_17 16 1836 8070 9906 8070 9905 100.0 100 3391 False True True False False False False False False atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa atg taa True True True +9 0 locus_18 17 1914 9917 11831 9917 11830 100.0 100 3535 False True True False False False False False False atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag atg tag True True True +10 0 locus_19 18 2037 11842 13879 11842 13878 99.215 100 3674 False True True False False False False False False atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacattgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccaccgtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggccgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattactggaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcattcagggcggtacggaagtttgtcagcaaaaactggataccatgcgcaacgcaggcgtaaaagtgaacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggcgtcagcgcagagatcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacaggagaccggcaagctcggcgagatcctgttctttatgcgtgcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccggcaataactggcagttcgacggcgacgcggaaactattgcccactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgaaattaccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa atg taa True True True +11 0 locus_2 1 285 13890 14175 13890 14174 94.035 100 433 False True True False False False False False False atgaaactttttctgacaacggcagcgctaactgcgaccttgatttccggcatggcatttgccagcgatcccgtcatcccctgggcgactaacagcggcggtacggaaagcacgcatattgcggcaatgggcgaggatttgaatgcgcagcatcagcagatcacccacacgcatgaaggcgtatgggcggccaattccggcagcattcaggctgatgaagccgcgctgaccagcaacaagccgccggtacaaggccacccggaattgatgccgcatcagggctaa atg taa True True True +12 0 locus_20 19 4935 14186 19121 14186 19120 100.0 100 9114 False True True False False False False False False atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa atg taa True True True +13 0 locus_3 2 327 19132 19459 19132 19458 100.0 100 604 False True True False False False False False False atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa atg taa True True True +14 0 locus_4 3 417 19470 19887 19470 19886 97.362 100 710 False True True False False False False False False ctgaaactaatacctttttatttgctcgcgctattttctgccgcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaaccactgcagataacccgccttataccatgacaatattgccgccagtagaaggctgcggggaaaaatgtgtgaaactgaacgtacagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaaggacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctataaccgacagctgcaattactttgggctggcgtaataccgaaaaataaaaaggggaaatggataacgaaaatggaaaattactggttcaggcaggtaaaagcgcactaa ctg taa True True True +15 0 locus_5 4 444 19898 20342 19898 20341 96.622 100 737 False True True False False False False False False atgaaaaaattttatagctgtcttcctgtctttttactgatcggctgtgcccaggtgcccctcccttcctccgtgagcaaaccggtacagcaacctggcgctcagaaagagcaactggccaacgcaaatagtattgatgagtgtcagtctcttccgtatgtgccgtcagaccttgcgaagaataaatcattatcaaaccagaacgctgataattccgcatcaaaaaatagcgcaatcagctcaagcattttttgcgaaaaatataaacaaaccaaagaacaggcgcttaccttcttccaggaacatccacaatacatgcgttcgaaagaagatgaagagcaactcatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa atg taa True True True +16 0 locus_6 5 543 20353 20896 20353 20895 100.0 100 1003 False True True False False False False False False atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa atg taa True True True +17 0 locus_7 6 606 20907 21513 20907 21512 97.525 100 1037 False True True False False False False False False gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggcctcacaacatgcctggagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgtcagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcaccatccagtcggcaacctggttttccgcaggcaggtcgcttacggcggataaaaaaatgatggattgcggcagcggtatttatgcttcaatcaatacgttactgaaaaagtcgcagaataaaaatatagtcatttttacccataatcattgtttgacgtatattgcaaaaaataagcgcggcgtgaagtttgatccggactatcttaatgcgctcgttatgtatgcggaaaacggaaaactctttttagacggtgaatttgttcccggctag gtg tag True True True +18 0 locus_8 7 642 21524 22166 21524 22165 100.0 100 1186 False True True False False False False False False atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga atg tga True True True +19 0 locus_9 8 684 22177 22861 22177 22860 100.0 100 1264 False True True False False False False False False atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag atg tag True True True diff --git a/tests/test_data/outputs/extract/G10/blast/hsps.txt b/tests/test_data/outputs/extract/G10/blast/hsps.txt new file mode 100755 index 0000000..de2c58f --- /dev/null +++ b/tests/test_data/outputs/extract/G10/blast/hsps.txt @@ -0,0 +1,20 @@ +0 0 102 22643 1 102 12 113 102 0 100.000 100 100 plus 1.82e-51 189 +1 0 285 22643 1 285 13662 13946 285 0 100.000 100 100 plus 1.04e-152 527 +2 0 327 22643 1 327 18904 19230 327 0 100.000 100 100 plus 5.41e-176 604 +3 0 417 22643 1 417 19242 19658 417 0 100.000 100 100 plus 0.0 771 +4 0 444 22643 1 444 19670 20113 444 0 100.000 100 100 plus 0.0 821 +5 0 543 22643 1 543 20125 20667 543 0 100.000 100 100 plus 0.0 1003 +6 0 606 22643 1 606 20679 21284 606 0 100.000 100 100 plus 0.0 1120 +7 0 642 22643 1 642 21296 21937 642 0 100.000 100 100 plus 0.0 1186 +8 0 684 22643 1 684 21949 22632 684 0 100.000 100 100 plus 0.0 1264 +9 0 762 22643 1 762 125 886 762 0 100.000 100 100 plus 0.0 1408 +10 0 858 22643 1 629 898 1526 629 0 100.000 73 73 plus 0.0 1162 +11 0 972 22643 1 972 1538 2509 972 0 100.000 100 100 plus 0.0 1796 +12 0 1098 22643 1 1098 2521 3618 1098 0 100.000 100 100 plus 0.0 2028 +13 0 1281 22643 1 1281 3630 4910 1281 0 100.000 100 100 plus 0.0 2366 +14 0 1434 22643 1 1434 4922 6355 1434 0 100.000 100 100 plus 0.0 2649 +15 0 1464 22643 1 1464 6367 7830 1464 0 100.000 100 100 plus 0.0 2704 +16 0 1836 22643 1 1836 7842 9677 1836 0 100.000 100 100 plus 0.0 3391 +17 0 1914 22643 1 1914 9689 11602 1914 0 100.000 100 100 plus 0.0 3535 +18 0 2037 22643 1 2037 11614 13650 2037 0 100.000 100 100 plus 0.0 3762 +19 0 4935 22643 1 4935 13958 18892 4935 0 100.000 100 100 plus 0.0 9114 diff --git a/tests/test_data/outputs/extract/G10/blast_db/contigs.fasta b/tests/test_data/outputs/extract/G10/blast_db/contigs.fasta new file mode 100755 index 0000000..6fb4a83 --- /dev/null +++ b/tests/test_data/outputs/extract/G10/blast_db/contigs.fasta @@ -0,0 +1,2 @@ +>0 +nnnnnnnnnnnatgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaannnnnnnnnnnatgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaannnnnnnnnnngtgaacctggtgaaaaccccgggtctgcacgcggcgggcaaaggcattaacgttgccaatgtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccaacttcaactttcccggctttgacgtgactcccgcagactgggaacgctttgttaacgacttcctgagctggctgggtcagttcgatatggtctgcgtaagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatcgttgatactagccgtgtagagttagtcgccggtcttatagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcccgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgcgggttaacgcctcaggagannnnnnnnnnnatgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatgannnnnnnnnnnatgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctgannnnnnnnnnnttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctattacgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccagcgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcggaaaggcgggattggttcgcgcggtccgggtgaaacccagctcgaagccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgagaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgcagctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaattccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtccgcaggtacgaatgccgattgtcgactggcgtcgcttctgtaaacaagaaccagcgttgatcgaatacgtgatctagnnnnnnnnnnngtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatgannnnnnnnnnnatgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctcgtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatctgcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacggtgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcactcaaccggaagggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaaggctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgatgcgctgtcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtaagctgatcagccttgtgcgcaaccttaatcaactgctcaaaagcgagcacgaacgttataacaaataccgcacgagcctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatctgcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcatagcccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttctggccgacagcatcccacacagaaagaggaataannnnnnnnnnnatgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataannnnnnnnnnnatgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtagnnnnnnnnnnnatgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacatcgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccacagtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggtcgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattacttgaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcatacagggcggtacggaagtttgtcagcaaaaactggataacatgcgcaacgcaggcgtaaaagtgtacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggggtcagcgcagagctcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacagaagaccggcaagctcggcgagatcctgttctttatgcgggcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccagcaatgactggcagttcgacggcgacgcggaaactattgcacactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgagatttccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaannnnnnnnnnnatgaaactttttctgacaacggcagcgctaactgcgaccttgacttccggcatgggatttgccagcgataccgtgatcccctgggcgactaacagtggcggtacggaaagcacgcatatcgcggcaatgggcgaggatgtgaatgcgcagcatcagcacatcatccacacgcatgaaggcgtatgtgccgcaaattccggcaccattcaggctgatgaagccgcgctgaccagcaacaagccaccggtacaagtccaaccggaattgttgccgcatcagggctaannnnnnnnnnnatgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaannnnnnnnnnnatggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaannnnnnnnnnnctgaaactaatacctttttatttgctcgcgctattttctgcctcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaacccctgcagataacccgccttataccgtgacaatattgccgccagtagaagcctgcggggaaaaatgtgtgaaactgaacgtagagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaacgacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctacaaccgacagctgcaattacttagggctggcgtaataccgaaaaataaaaagtggaaatggataacgaagatggaaaattactggttcaggcgggtaaaagcgcactaannnnnnnnnnnatgaaaaaatttgatagctgtcatcctgtctttttactgatcggctgtgcccaggtgcctctccgttcctccgtgagcaaaccggtacagcaacctagcgctcagaaagagcaactggccaacgcaaatggtattgatgagtgtcagtcacttccgtatgtgccttcagaccttgcgaagaataaatcattatcaaaccagatcgctgataataccgcatcaaaaaatagcgcaatcagctcacgcattttttgcgaaaaatataagcaaaccaaagaacaggcgcttaccttcttccaggaacttccacaatacatgcgttcgaaagaagttgaagagcaacacatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataannnnnnnnnnnatgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaannnnnnnnnnngtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggactcacaacatgcctgtagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgccagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcgccatccagtcggcaacctggttttcggcaggcaggtcgcgtagggcggttaaaaaaatgatggattgcggcagcggtatttattcttcaatcaatacgttactgaaaaattcgcagattaaaaatatagtcattttaacccataatcattgtttgacgtatattgtaaaaaataagcgcggcgtgaagtttgatccggagtatcttaatgcgctcgttatgtatgcggaaaacggaaaactccttttagacggtgaatttgttcccggctagnnnnnnnnnnnatgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatgannnnnnnnnnnatgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctagnnnnnnnnnnn diff --git a/tests/test_data/outputs/extract/G10/blast_db/contigs.fasta.ndb b/tests/test_data/outputs/extract/G10/blast_db/contigs.fasta.ndb new file mode 100755 index 0000000000000000000000000000000000000000..5fd6f7085890c929e9fa647574a304e6e7c1d317 GIT binary patch literal 20480 zcmeI&O-jR15CGt@3T7h_@eblfJc3sVVx^$TqQ!Nu)s6HdF1xii(B|hgQK1OcMd8aL zZ(j2%Z#3(Lfo-gxxPK5vg0t5&UAV7cs0RjXF5cpex+fBMroW@J_R2I(Gc zhlE@!{5mI;N-if6o6T}dZp-C=UVNSx&&$t?=fwkH`P$!bXJfw5NQ^_M`mG`V_4j?P zGvC%ejXl}zR%_&(?G{&!qnEvdFmF6ausn(TU~QttI_hEgh$55B{bT7GiT{N(^4mf} znhVb1<_?!fEIl_$&vcCDWlaikAnj+&Ky3%-X)?&3X!v)j{3d~e<5$#uE?Fx69(g_9 zAQt61VL&x+xJ^tDS#2D7SIz zE|Q=lCKU9Ag7D6aA-KuOvg$7H)_7aEp5%plq&tbRqJV-Gbj@%e-Pt$o+^&@k!KMbrTDHilM{Sbr)`6`X^3;i zwJ4;dQZ8(WsLXd4ltoztC61XktcoCXGjUs)!?oFl4c?P*OGs3ubZKv5i(KzE;}_kY z=NBdlasuO7wFMt>%+ZjSf`fbgtF-CWQz9RYku_9>lyZ2h&vgxwmqVbm2yZ`j$IZ(d zD+I;zWt-lvA_1emZ-oZg=RiDo?Zk#$@%#pfy0Y|msxhTa;=nDu2iUF?T<{B$>^Va7 z&HL<&YGFtsF&C9OC)NiS8|3?eGyXgIw;YBOW56>rAdKR>;w;fUAM#g3{`fm@#RGF#D({NA2awj zv@X!MoYcboCzAa$Md#z#t5kbHk_m__`!(IOtDMvfr#)<&u$I+R#m*ApoG%a@JXB$3 zt7ee6e(vGQg>psnN1SP5@cdNeZ^Be-tk=L2m%v@ze#|42@&+;EWz-ylu}mYLa;|GD z9~4SV`NhGWUDXtycY72sV&6)Z(Mt^QM+{Mk$=s`ILAry~~>z$A0nZ(_?Y< za-6theX;1N6{^a1V!1)OpOm!neUVkIQ1W;YUn`ZL>PhE}H2}tGGYs!oMDi08)g57+ zU%1KLg*Tq?W&=alQ<%O-2mMb%bU%exZ zYIeqM;rGbf@>4z(ZfcA^b0t&`nmJo%==~KUejMeW># zpZ|k^bA@5M3S(xk2+MlXvm)can7Xe8L=!vx)LyJve+BV*1jiD@v&v{*%86COaJ2}x z7N{a(OxGfer8lx5dWrwZJhC4GjSt>4GfcJO_z->7f@6Xs9l+^W9Ynao*~{y8=|-xY zXU@gx7Vd1)qjNEaQLEoLha>NWjB;$sOBT`ooi*vwlH7R<-N`~k9gMKx`AKn{@VV$o z{U@xejkVNdcb8qb4Azgc}ECH2?p?rF z&#K|Ngo3O#ppeor!mZ#S&^;2|YNxnfibi{OW~w&ZTe({5Z-T+&tQg>ziSr($o?WZs ztYtJ}%$NokHT;UY`{XK}^msW4qy*cPxPMT!6s)6}x@1%=IZt98rBPB=tRm>!BCBsT zc@YV0aR@k_EhkBQ{ILG?<$P5TXYjw*;$PC;26Ym*8obaJ5}0W`rstCuW9j*<8xeOp(F#ydW%tO2ExcoPFjg)qUdK)h`H4JI!n-A;F`+5Xq9n>bD`6@8-GPA zxZ4Z$i462XMF(j`THwI%7C_0x-sgqj@|$Q7eQaTF|B(bE@en~;7*B}BK{FiMK-g`S z_b1qz*X#@Ri7(>%FE?gh)=Mc&^!;ZR8t544E@koVX6XZEthr%9kbE)fTy*FtKZ! zfz9OZ{tT<0_N)&zesAe%voi1VEa z|9&2d(YndO5&jL;$Znd{=Nha*Tk}_%bsjr}n9}T;Kh}=YNk=YW_uL-?J**$ILXdyl z2yqO_=;wSa#tlVU7S(!ZZil+JiL>)|L&=B$R88&n2$H-CLq#sZ~mN=x6s=tX0H?v9|g(9u11nO z6~*?Ja~oz)*4c0FAz!V@qv)=}(y1aE2`+JgIz_EiCxAmJ%>2|}@b+tua=|PKqCIqn zzm~>1q20`2m&wd9g?_Sq)@NQV4IZeT5aIP-bEosx4RCI?q8kno-c^WaRkP0FWc{~K zTg*`uA>YqNaT-UjIb`QuF}7((fF{<@pezh3uNMX5|dziId7RtnU2U!}jj0_OA~d(Wo_M`2XC z^^QV0X(Gf%9$e?J!8nfQ=cC4^{O@eG^TeB@SgYicMI^ zP$lUn!gO>raWwbZlY#1{W0%M~!Ur03SNyxP2NrXc)K8{k388VO5Rpg)MyKUKZ_{j5@B|^B7p=+j`~8fI zV^-Rao|0de;Fy2b_CmRWLeKTjX0w5`Nt&lP$U{%$`!K^n-{>fk!O_<2dV6bVE?`jj zi2Fn%-68=q#RAou)2;QA>V=I{MXsR}11=;7omdAKZ28bheIWy*jtejz2%aLZELOl5 z`|o{<+&1ZXQ-(8At)kD(EQL!C3|2juc|>D(wubjo)n`4$0|3bt;f_wR2E4*Y>CUYv zvzFU?-Dwd63EwU0J8~ zdZb;Kn16hr>-2Q-$VmtP>>J!9X0nz@O*-ryuxvzK4%Pb(^!B~I0T*ys>qr2_L3iQDrDl>LKlG9Lr z36m_A6fLuDsaUL`W0q1GifQ3@8UY5KYGNPg=u*w|+Cx?yzG6R@;4CP6h4JxHU(%Uq zVzL92R>A$N6BUv_>xEOjZ{#Rv;}C zeJ*ocau5q6_`BD(ZwC44ge2&Y5?k_&Ru4o&9w=GZn)=d}>?j`= zrzy8Js!c}zK}}h`MRv&$B5k7WL*d;!B<10%2wP(2^cBXK2xXUL_u-_UTGT9BVIa%j zJ8q0cN-@*U!Mxh<8Sb`*?=Mlc?eLRxM>I*OPVae;q*QdsGfj>VjBQ7%=CEEfFA#`F zb?Z>CF7G?yIt`yshhT)1XSktt`G231_HKN9UUgDBp(0O4u9CA$d637t zyoK6A-DpxA)F`bmSpCSFN5&l@>>57$#RLYE^Qiu+Y4a@V zHW+?0%XyjX4Q_w6`yWko*UyE^NL%uAbXJVcKV)&wI$jk7$Q*nJ^jB{@xviI6ba zSFn)P&PL@z-XSE1arf}Ct1Nq|7;e#*hL1IuIO7&pE;o);c$E(W($5(yAJ)%0p}V@T@EYJV0UL*au2D<4#CnZX!08%Y{THRrZ@*n zH%^N8tP2anCHacuu!o!;NDwis6IViJN?dRELKpH3j_FQXd2WSLND%!dkt>ue9VDx- zh23*p#^*kw{V*~Vx5IQP?dw!dPSa76$wGs>79zAL z$6HuukhS_(qryA&=HCYY#L{+nvixyw>jWjd;2wZ{j#>8><=eBl#N7*GqxwX)rDQ*I z(+)_N7Q$w9&@qYQq{#mTwG_#phH5C{3VA~Ey+{;td1D$AYf)UZjU9=EU`qH<-31Tu z5K_zG1H9c&VX}uss9;rF_Xbuz2*6d4nk1I#bRq0rQg{tF&~!*q>~J>y-_QF@1x~zNWRnilNHkvRv3W-%Qe`pfI`qA$wR%7PK`R%Ymj-07yqx3pw@O5Qh zvR&{3oA4;47+x8EE0-6^vAvqMPWFC3Ca~gwQF| z(R+C6Yx%5z>Hh@9*X=+nRwznXu;|3Dz8Fjw5meiP)xzv8Q(QTt8p6`V3uMUbg-@{t zEE>ptEcA#}KxT_TLg_tTc8wR)S0eX}U~c%T)N`sCnW%?;I|}9b6D<<6?Q6<%`5tFc zWqm==w@yv;+qM1q)%kSX}#Vw=6L!`L5`USDcwq0HrM5YqS z)q=E=YI&;y3I2IC;GVYd=O+SmgzX(bghQFoo*UmSndlk89ZQ~W^(Z5*A4FfiKc1xb zR{*+~M-jh9=6!rm%}u4El(v{|O4>HlDU3Quf%J4l^cq&K1V@_<{&}39Ig4@THHky7 zlOsS2zM}CcZwD$TmvxWxnl`CGo$q%xgVTQ0x@e*44sA_TO6%LNkIkbI(yJhmSWS3h zs8awbGMh9QU4(v#KoP!j<rtWad#JA(igv8JY>XOB_s1fb4#t_HhaAlA{A=FlI&nN(F=(fiQhFw8&E_lZ_qcJ z?kXRDZzT6}*Oq%e)VGp*DIzaCO)1nPWg>hhUoq6H10CPlaLf(uW1P$3D?Cq(Gc@T{ z9&fmYRw6A5o%D}dmCF~Go|C3!uUc_k#3y6LyR?H*ldkasNp)wUI?$w0q@*HN=&tgP zXh)t>C29HrS`!v^pPp?QIq-zu!p||6qRkY`QHrngjZHiyo|!Ivj064emTDVs?YSM6 ztl)2_Xi7@6!M_DwXZByiZ>=S&zwkI?>`m%i`M|$*+{U>jk+y2$xicks9%$(v3!=Ty zdU3jEPxEbRVB+1(+&9*@W1W-RL-ji(yxLwTlamg;&V!$GTzV6hZ;(EZ08?EUM4J#B zx%Iq=fzI#SNT31E5$<90XG}byLa;9xlEO#wKFi@5!SbO0PuP#GDLyYSXb@&{!FhHU zXIqulR7Iq9D>ZQ|_!@TT; zi=@T&ZA!7rxKWOvwRw?LCcftBd=pt6c@>|bR1Je<9@{J%^H22RKp+d*nBIa@4-F02 zak6xwLa8y65r71%&3`oFY=8xgf0y>zR1yONhCRV8vZXcgZ_Yg6Bwj1#^!44+P>(SJ zjxb$;H+5Tzr|o{OM)g$#FNj>)rhQ@0yy(VaEe8mf`?OAPaBso%18)dv>(T4NnATNl6 zlR5aO?seRBPf&a`1m%q;N#LWua_zJH>DU0YSoq&Fa0EKp5Z2VVHh~H1U9v6a4BEr4pW?3Yb=e4g!LO#=5O%d= zFP45?PsJ})`TyVZCkX%m=*s?!|F|jx6mG}>#bp_w+9LxrO=Q5%12RDWy$mp7$pG{J sy8Qt%!2UlRKpB8~BLjSlWWedaWq@Cn3^?;#1_YYPfb%snAUOR00pLAF6951J literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G10/blast_db/contigs.fasta.ntf b/tests/test_data/outputs/extract/G10/blast_db/contigs.fasta.ntf new file mode 100755 index 0000000000000000000000000000000000000000..005ac416d78d808991db961db8e272a0664078b1 GIT binary patch literal 16384 zcmeI&O=`kW5CGt5>cWK(D?Lc@1YSi@p{p+HqW9@Fr1T_$d!a|DnwR0}#*Gvzd|71X z{g6z)-S85T*RV^r-#=duIdzJkyQgkAju_6m?CR~ezpidNTg?3z?>t+V5+Fc;009C7 z2oNAZfB*pkw+qz1`!D+c`t+KWnseOrpYYhnoWK3cJb?fK0t5&UAV7cs0RjXF5J-XS zzayHM#HQV}xgruEK!5-N0t5&UAV7cs0RsO);N@6E6QfwqKVBEpx7F%>xjxnP9Q72v tW~JgB=Xra^(*N(D`o%y literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G10/blast_db/contigs.fasta.nto b/tests/test_data/outputs/extract/G10/blast_db/contigs.fasta.nto new file mode 100755 index 0000000000000000000000000000000000000000..20d5cb86e6dff1f3684dc229a358a2ea697cecfb GIT binary patch literal 8 KcmZQ%fB*mh5C8%I literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G10/filtered.hsps.txt b/tests/test_data/outputs/extract/G10/filtered.hsps.txt new file mode 100755 index 0000000..2a6f59b --- /dev/null +++ b/tests/test_data/outputs/extract/G10/filtered.hsps.txt @@ -0,0 +1,20 @@ +qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore locus_name ext_start ext_end is_5prime_complete is_3prime_complete is_complete is_5prime_boundary is_3prime_boundary is_on_boundary reverse complement is_extended is_5p_extended is_3p_extended +0 0 102 22643 0 101 11 112 102 0 100.0 100 100 plus 1.82e-51 189 locus_1 11 112 True True True False False False False True False False False +9 0 762 22643 0 761 124 885 762 0 100.0 100 100 plus 0.0 1408 locus_10 124 885 True True True False False False False True False False False +11 0 972 22643 0 971 1537 2508 972 0 100.0 100 100 plus 0.0 1796 locus_12 1537 2508 True True True False False False False True False False False +12 0 1098 22643 0 1097 2520 3617 1098 0 100.0 100 100 plus 0.0 2028 locus_13 2520 3617 True True True False False False False True False False False +13 0 1281 22643 0 1280 3629 4909 1281 0 100.0 100 100 plus 0.0 2366 locus_14 3629 4909 True True True False False False False True False False False +14 0 1434 22643 0 1433 4921 6354 1434 0 100.0 100 100 plus 0.0 2649 locus_15 4921 6354 True True True False False False False True False False False +15 0 1464 22643 0 1463 6366 7829 1464 0 100.0 100 100 plus 0.0 2704 locus_16 6366 7829 True True True False False False False True False False False +16 0 1836 22643 0 1835 7841 9676 1836 0 100.0 100 100 plus 0.0 3391 locus_17 7841 9676 True True True False False False False True False False False +17 0 1914 22643 0 1913 9688 11601 1914 0 100.0 100 100 plus 0.0 3535 locus_18 9688 11601 True True True False False False False True False False False +18 0 2037 22643 0 2036 11613 13649 2037 0 100.0 100 100 plus 0.0 3762 locus_19 11613 13649 True True True False False False False True False False False +1 0 285 22643 0 284 13661 13945 285 0 100.0 100 100 plus 1.04e-152 527 locus_2 13661 13945 True True True False False False False True False False False +19 0 4935 22643 0 4934 13957 18891 4935 0 100.0 100 100 plus 0.0 9114 locus_20 13957 18891 True True True False False False False True False False False +2 0 327 22643 0 326 18903 19229 327 0 100.0 100 100 plus 5.4099999999999994e-176 604 locus_3 18903 19229 True True True False False False False True False False False +3 0 417 22643 0 416 19241 19657 417 0 100.0 100 100 plus 0.0 771 locus_4 19241 19657 True True True False False False False True False False False +4 0 444 22643 0 443 19669 20112 444 0 100.0 100 100 plus 0.0 821 locus_5 19669 20112 True True True False False False False True False False False +5 0 543 22643 0 542 20124 20666 543 0 100.0 100 100 plus 0.0 1003 locus_6 20124 20666 True True True False False False False True False False False +6 0 606 22643 0 605 20678 21283 606 0 100.0 100 100 plus 0.0 1120 locus_7 20678 21283 True True True False False False False True False False False +7 0 642 22643 0 641 21295 21936 642 0 100.0 100 100 plus 0.0 1186 locus_8 21295 21936 True True True False False False False True False False False +8 0 684 22643 0 683 21948 22631 684 0 100.0 100 100 plus 0.0 1264 locus_9 21948 22631 True True True False False False False True False False False diff --git a/tests/test_data/outputs/extract/G10/processed.extracted.seqs.fasta b/tests/test_data/outputs/extract/G10/processed.extracted.seqs.fasta new file mode 100755 index 0000000..8a56068 --- /dev/null +++ b/tests/test_data/outputs/extract/G10/processed.extracted.seqs.fasta @@ -0,0 +1,38 @@ +>locus_1:0:0:0 +atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa +>locus_10:9:0:1 +atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa +>locus_12:11:0:2 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>locus_13:12:0:3 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>locus_14:13:0:4 +ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctattacgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccagcgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcggaaaggcgggattggttcgcgcggtccgggtgaaacccagctcgaagccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgagaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgcagctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaattccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtccgcaggtacgaatgccgattgtcgactggcgtcgcttctgtaaacaagaaccagcgttgatcgaatacgtgatctag +>locus_15:14:0:5 +gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga +>locus_16:15:0:6 +atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctcgtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatctgcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacggtgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcactcaaccggaagggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaaggctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgatgcgctgtcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtaagctgatcagccttgtgcgcaaccttaatcaactgctcaaaagcgagcacgaacgttataacaaataccgcacgagcctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatctgcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcatagcccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttctggccgacagcatcccacacagaaagaggaataa +>locus_17:16:0:7 +atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa +>locus_18:17:0:8 +atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag +>locus_19:18:0:9 +atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacatcgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccacagtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggtcgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattacttgaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcatacagggcggtacggaagtttgtcagcaaaaactggataacatgcgcaacgcaggcgtaaaagtgtacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggggtcagcgcagagctcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacagaagaccggcaagctcggcgagatcctgttctttatgcgggcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccagcaatgactggcagttcgacggcgacgcggaaactattgcacactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgagatttccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa +>locus_2:1:0:10 +atgaaactttttctgacaacggcagcgctaactgcgaccttgacttccggcatgggatttgccagcgataccgtgatcccctgggcgactaacagtggcggtacggaaagcacgcatatcgcggcaatgggcgaggatgtgaatgcgcagcatcagcacatcatccacacgcatgaaggcgtatgtgccgcaaattccggcaccattcaggctgatgaagccgcgctgaccagcaacaagccaccggtacaagtccaaccggaattgttgccgcatcagggctaa +>locus_20:19:0:11 +atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa +>locus_3:2:0:12 +atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa +>locus_4:3:0:13 +ctgaaactaatacctttttatttgctcgcgctattttctgcctcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaacccctgcagataacccgccttataccgtgacaatattgccgccagtagaagcctgcggggaaaaatgtgtgaaactgaacgtagagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaacgacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctacaaccgacagctgcaattacttagggctggcgtaataccgaaaaataaaaagtggaaatggataacgaagatggaaaattactggttcaggcgggtaaaagcgcactaa +>locus_5:4:0:14 +atgaaaaaatttgatagctgtcatcctgtctttttactgatcggctgtgcccaggtgcctctccgttcctccgtgagcaaaccggtacagcaacctagcgctcagaaagagcaactggccaacgcaaatggtattgatgagtgtcagtcacttccgtatgtgccttcagaccttgcgaagaataaatcattatcaaaccagatcgctgataataccgcatcaaaaaatagcgcaatcagctcacgcattttttgcgaaaaatataagcaaaccaaagaacaggcgcttaccttcttccaggaacttccacaatacatgcgttcgaaagaagttgaagagcaacacatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa +>locus_6:5:0:15 +atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa +>locus_7:6:0:16 +gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggactcacaacatgcctgtagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgccagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcgccatccagtcggcaacctggttttcggcaggcaggtcgcgtagggcggttaaaaaaatgatggattgcggcagcggtatttattcttcaatcaatacgttactgaaaaattcgcagattaaaaatatagtcattttaacccataatcattgtttgacgtatattgtaaaaaataagcgcggcgtgaagtttgatccggagtatcttaatgcgctcgttatgtatgcggaaaacggaaaactccttttagacggtgaatttgttcccggctag +>locus_8:7:0:17 +atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga +>locus_9:8:0:18 +atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/extract/G10/raw.extracted.seqs.fasta b/tests/test_data/outputs/extract/G10/raw.extracted.seqs.fasta new file mode 100755 index 0000000..8a56068 --- /dev/null +++ b/tests/test_data/outputs/extract/G10/raw.extracted.seqs.fasta @@ -0,0 +1,38 @@ +>locus_1:0:0:0 +atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa +>locus_10:9:0:1 +atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa +>locus_12:11:0:2 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>locus_13:12:0:3 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>locus_14:13:0:4 +ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctattacgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccagcgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcggaaaggcgggattggttcgcgcggtccgggtgaaacccagctcgaagccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgagaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgcagctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaattccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtccgcaggtacgaatgccgattgtcgactggcgtcgcttctgtaaacaagaaccagcgttgatcgaatacgtgatctag +>locus_15:14:0:5 +gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga +>locus_16:15:0:6 +atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctcgtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatctgcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacggtgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcactcaaccggaagggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaaggctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgatgcgctgtcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtaagctgatcagccttgtgcgcaaccttaatcaactgctcaaaagcgagcacgaacgttataacaaataccgcacgagcctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatctgcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcatagcccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttctggccgacagcatcccacacagaaagaggaataa +>locus_17:16:0:7 +atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa +>locus_18:17:0:8 +atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag +>locus_19:18:0:9 +atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacatcgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccacagtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggtcgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattacttgaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcatacagggcggtacggaagtttgtcagcaaaaactggataacatgcgcaacgcaggcgtaaaagtgtacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggggtcagcgcagagctcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacagaagaccggcaagctcggcgagatcctgttctttatgcgggcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccagcaatgactggcagttcgacggcgacgcggaaactattgcacactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgagatttccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa +>locus_2:1:0:10 +atgaaactttttctgacaacggcagcgctaactgcgaccttgacttccggcatgggatttgccagcgataccgtgatcccctgggcgactaacagtggcggtacggaaagcacgcatatcgcggcaatgggcgaggatgtgaatgcgcagcatcagcacatcatccacacgcatgaaggcgtatgtgccgcaaattccggcaccattcaggctgatgaagccgcgctgaccagcaacaagccaccggtacaagtccaaccggaattgttgccgcatcagggctaa +>locus_20:19:0:11 +atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa +>locus_3:2:0:12 +atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa +>locus_4:3:0:13 +ctgaaactaatacctttttatttgctcgcgctattttctgcctcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaacccctgcagataacccgccttataccgtgacaatattgccgccagtagaagcctgcggggaaaaatgtgtgaaactgaacgtagagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaacgacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctacaaccgacagctgcaattacttagggctggcgtaataccgaaaaataaaaagtggaaatggataacgaagatggaaaattactggttcaggcgggtaaaagcgcactaa +>locus_5:4:0:14 +atgaaaaaatttgatagctgtcatcctgtctttttactgatcggctgtgcccaggtgcctctccgttcctccgtgagcaaaccggtacagcaacctagcgctcagaaagagcaactggccaacgcaaatggtattgatgagtgtcagtcacttccgtatgtgccttcagaccttgcgaagaataaatcattatcaaaccagatcgctgataataccgcatcaaaaaatagcgcaatcagctcacgcattttttgcgaaaaatataagcaaaccaaagaacaggcgcttaccttcttccaggaacttccacaatacatgcgttcgaaagaagttgaagagcaacacatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa +>locus_6:5:0:15 +atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa +>locus_7:6:0:16 +gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggactcacaacatgcctgtagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgccagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcgccatccagtcggcaacctggttttcggcaggcaggtcgcgtagggcggttaaaaaaatgatggattgcggcagcggtatttattcttcaatcaatacgttactgaaaaattcgcagattaaaaatatagtcattttaacccataatcattgtttgacgtatattgtaaaaaataagcgcggcgtgaagtttgatccggagtatcttaatgcgctcgttatgtatgcggaaaacggaaaactccttttagacggtgaatttgttcccggctag +>locus_8:7:0:17 +atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga +>locus_9:8:0:18 +atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/extract/G10/seq_data.txt b/tests/test_data/outputs/extract/G10/seq_data.txt new file mode 100755 index 0000000..d72cb6f --- /dev/null +++ b/tests/test_data/outputs/extract/G10/seq_data.txt @@ -0,0 +1,20 @@ +id seqid locus_name query_id qlen start end sub_start sub_ent ident qcovs bitscore reverse complement is_complete is_trunc fivep_trunc threep_trunc is_extended is_5p_extended is_3p_extended seq start_codon stop_codon is_stop_valid is_start_valid is_cds_valid +0 0 locus_1 0 102 11 113 11 112 100.0 100 189 False True True False False False False False False atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa atg taa True True True +1 0 locus_10 9 762 124 886 124 885 100.0 100 1408 False True True False False False False False False atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa atg taa True True True +2 0 locus_12 11 972 1537 2509 1537 2508 100.0 100 1796 False True True False False False False False False atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga atg tga True True True +3 0 locus_13 12 1098 2520 3618 2520 3617 100.0 100 2028 False True True False False False False False False atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga atg tga True True True +4 0 locus_14 13 1281 3629 4910 3629 4909 100.0 100 2366 False True True False False False False False False ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctattacgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccagcgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcggaaaggcgggattggttcgcgcggtccgggtgaaacccagctcgaagccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgagaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgcagctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaattccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtccgcaggtacgaatgccgattgtcgactggcgtcgcttctgtaaacaagaaccagcgttgatcgaatacgtgatctag ttg tag True True True +5 0 locus_15 14 1434 4921 6355 4921 6354 100.0 100 2649 False True True False False False False False False gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga gtg tga True True True +6 0 locus_16 15 1464 6366 7830 6366 7829 100.0 100 2704 False True True False False False False False False atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctcgtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatctgcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacggtgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcactcaaccggaagggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaaggctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgatgcgctgtcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtaagctgatcagccttgtgcgcaaccttaatcaactgctcaaaagcgagcacgaacgttataacaaataccgcacgagcctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatctgcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcatagcccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttctggccgacagcatcccacacagaaagaggaataa atg taa True True True +7 0 locus_17 16 1836 7841 9677 7841 9676 100.0 100 3391 False True True False False False False False False atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa atg taa True True True +8 0 locus_18 17 1914 9688 11602 9688 11601 100.0 100 3535 False True True False False False False False False atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag atg tag True True True +9 0 locus_19 18 2037 11613 13650 11613 13649 100.0 100 3762 False True True False False False False False False atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacatcgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccacagtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggtcgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattacttgaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcatacagggcggtacggaagtttgtcagcaaaaactggataacatgcgcaacgcaggcgtaaaagtgtacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggggtcagcgcagagctcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacagaagaccggcaagctcggcgagatcctgttctttatgcgggcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccagcaatgactggcagttcgacggcgacgcggaaactattgcacactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgagatttccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa atg taa True True True +10 0 locus_2 1 285 13661 13946 13661 13945 100.0 100 527 False True True False False False False False False atgaaactttttctgacaacggcagcgctaactgcgaccttgacttccggcatgggatttgccagcgataccgtgatcccctgggcgactaacagtggcggtacggaaagcacgcatatcgcggcaatgggcgaggatgtgaatgcgcagcatcagcacatcatccacacgcatgaaggcgtatgtgccgcaaattccggcaccattcaggctgatgaagccgcgctgaccagcaacaagccaccggtacaagtccaaccggaattgttgccgcatcagggctaa atg taa True True True +11 0 locus_20 19 4935 13957 18892 13957 18891 100.0 100 9114 False True True False False False False False False atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa atg taa True True True +12 0 locus_3 2 327 18903 19230 18903 19229 100.0 100 604 False True True False False False False False False atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa atg taa True True True +13 0 locus_4 3 417 19241 19658 19241 19657 100.0 100 771 False True True False False False False False False ctgaaactaatacctttttatttgctcgcgctattttctgcctcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaacccctgcagataacccgccttataccgtgacaatattgccgccagtagaagcctgcggggaaaaatgtgtgaaactgaacgtagagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaacgacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctacaaccgacagctgcaattacttagggctggcgtaataccgaaaaataaaaagtggaaatggataacgaagatggaaaattactggttcaggcgggtaaaagcgcactaa ctg taa True True True +14 0 locus_5 4 444 19669 20113 19669 20112 100.0 100 821 False True True False False False False False False atgaaaaaatttgatagctgtcatcctgtctttttactgatcggctgtgcccaggtgcctctccgttcctccgtgagcaaaccggtacagcaacctagcgctcagaaagagcaactggccaacgcaaatggtattgatgagtgtcagtcacttccgtatgtgccttcagaccttgcgaagaataaatcattatcaaaccagatcgctgataataccgcatcaaaaaatagcgcaatcagctcacgcattttttgcgaaaaatataagcaaaccaaagaacaggcgcttaccttcttccaggaacttccacaatacatgcgttcgaaagaagttgaagagcaacacatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa atg taa True True True +15 0 locus_6 5 543 20124 20667 20124 20666 100.0 100 1003 False True True False False False False False False atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa atg taa True True True +16 0 locus_7 6 606 20678 21284 20678 21283 100.0 100 1120 False True True False False False False False False gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggactcacaacatgcctgtagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgccagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcgccatccagtcggcaacctggttttcggcaggcaggtcgcgtagggcggttaaaaaaatgatggattgcggcagcggtatttattcttcaatcaatacgttactgaaaaattcgcagattaaaaatatagtcattttaacccataatcattgtttgacgtatattgtaaaaaataagcgcggcgtgaagtttgatccggagtatcttaatgcgctcgttatgtatgcggaaaacggaaaactccttttagacggtgaatttgttcccggctag gtg tag True True True +17 0 locus_8 7 642 21295 21937 21295 21936 100.0 100 1186 False True True False False False False False False atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga atg tga True True True +18 0 locus_9 8 684 21948 22632 21948 22631 100.0 100 1264 False True True False False False False False False atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag atg tag True True True diff --git a/tests/test_data/outputs/extract/G11/blast/hsps.txt b/tests/test_data/outputs/extract/G11/blast/hsps.txt new file mode 100755 index 0000000..f0d9a4a --- /dev/null +++ b/tests/test_data/outputs/extract/G11/blast/hsps.txt @@ -0,0 +1,21 @@ +0 0 102 23855 1 102 23844 23743 102 0 100.000 100 100 minus 1.92e-51 189 +1 0 285 23855 1 285 9965 9681 285 17 94.035 100 100 minus 2.46e-124 433 +2 0 327 23855 1 327 4723 4397 327 0 100.000 100 100 minus 5.70e-176 604 +3 0 417 23855 1 417 4385 3969 417 11 97.362 100 100 minus 0.0 710 +4 0 444 23855 1 444 3957 3514 444 15 96.622 100 100 minus 0.0 737 +5 0 543 23855 1 543 3502 2960 543 0 100.000 100 100 minus 0.0 1003 +6 0 606 23855 1 606 2948 2343 606 15 97.525 100 100 minus 0.0 1037 +7 0 642 23855 1 642 2331 1690 642 0 100.000 100 100 minus 0.0 1186 +8 0 684 23855 1 684 1678 995 684 0 100.000 100 100 minus 0.0 1264 +9 0 762 23855 1 762 23731 22970 762 0 100.000 100 100 minus 0.0 1408 +10 0 858 23855 1 858 22958 22101 858 19 97.786 100 100 minus 0.0 1480 +11 0 972 23855 1 972 983 12 972 0 100.000 100 100 minus 0.0 1796 +11 0 972 23855 1 972 22089 21118 972 0 100.000 100 100 minus 0.0 1796 +12 0 1098 23855 1 1098 21106 20009 1098 0 100.000 100 100 minus 0.0 2028 +13 0 1281 23855 1 1281 19997 18717 1281 11 99.141 100 100 minus 0.0 2305 +14 0 1434 23855 1 1434 18705 17272 1434 0 100.000 100 100 minus 0.0 2649 +15 0 1464 23855 1 1464 17260 15797 1464 15 98.975 100 100 minus 0.0 2621 +16 0 1836 23855 1 1836 15785 13950 1836 0 100.000 100 100 minus 0.0 3391 +17 0 1914 23855 1 1914 13938 12025 1914 0 100.000 100 100 minus 0.0 3535 +18 0 2037 23855 1 2037 12013 9977 2037 16 99.215 100 100 minus 0.0 3674 +19 0 4935 23855 1 4935 9669 4735 4935 0 100.000 100 100 minus 0.0 9114 diff --git a/tests/test_data/outputs/extract/G11/blast_db/contigs.fasta b/tests/test_data/outputs/extract/G11/blast_db/contigs.fasta new file mode 100755 index 0000000..68af8eb --- /dev/null +++ b/tests/test_data/outputs/extract/G11/blast_db/contigs.fasta @@ -0,0 +1,2 @@ +>0 +nnnnnnnnnnntcattctgacacctccattttttgcgccattttggatgctctgtattcagggatggtggtcacaatcgcaccgactaacgcgaacagcgtaccgatgatggtgaccaggtagaccgtattgcctaatgaagggatcaattcatcaattagcactgagccaagcagttggcctgctgttgacgctacgcccagcatcaatagccctaagcctctcaccagaatcgccattagcccgatggatagcagacccagcggaccaccgagatacatccaccatgtatcgggtaactggatggtgacatggcctaatgcgatacgtatcgccagcgccgcgcccaggacacaaaagccgacgatgaagttccatgtaatggacaccagcatggagcccgttgcctcggcgactttcgcattccccgcaggctgccagccagcgagtaaccctgccaaaaaggggaggatagcgagcaggataaacgaggttgagtgccactgtggcgacacgacaaaaatggtggcgataacggcgaacaatgcgccagtaatgcgccatggcgtaaaatattttttctcctccacgccgatgccaaaacggtcgcacagcaggccggaaagaagcagagcggaaattaatgccgtttgaaaggtggcaacgcccagcgcgctggcggatgcgccttcagaaaatacgaccatcgccccgcataatcctgcaaaccaattccatagcgggatttttctctttttaatcagagtagggattgaggcgaattgctggcgtgtttctttgcgcgcaataataataaaaaacatgacgaccagaccgctggcaaacgagattactgcgcaagcattaccgtcttgtaaccaatgtcctaactgcccattaacggcagactgcatcggggaaagcataccggctaagatggtggcaagcatcagtaagggggttgagtacttattcttgttcatnnnnnnnnnnnctagaatgaaccggaatgcggctctaaactcatcccataatcgctgattaatcgccacgttaccgtaccatgaggggcggcagtcgaactcaatttaaatgacgctgatgattttggcggaacgaaggtcgctgatttcacttcatggctgttaagcgtcacactggcaaagttcatataatagggggtagggttatttacggtaataacatcccctgccgtctgccattttaattgttggctctggctatcaggcgttgatttggttaatgctggcggtcgataaataagctttatttgggtattaatggagatttccacgcggttcgcggaggcattatcatcaatagaaggaatacccttaatattgagccagtacatagactcccgatctgcaggtagaggggcgccagagcgaataacccgaatactgttcttttgcccggcatcaaggcgaaaaagaggcggggtgataataaacacctgcttatttgtgacctggggatcggcaaccgataaccatgactgaacaagattcgctttgctgtctttattctctacattgattgacgactcatcattattgccgtcgaaaaccagtcgggttccgccaacaacaatgctggcatgggcaacatggcttgtcagtaatacgaccaacacggatttctttatgtgtttcatnnnnnnnnnnntcatgccgcttgctcctgcacccacttcgtcaccgtggcgcgaatgtcgcgcatgacggcatccagcggctgggtcgcgtcaatggtgcggatacgcgagtcttgcgccgccagttccagatagcgcgcgcgagtccggttaaagaaatcaaaagattcctgctcaatgcgatccaaatcgccgcgcgctctggcgcgttttagcccgacttccggcgtgacatccagatacagcgtcaggtcaggacgaaaatcgcccagcacggcatcgcgtagtgtcgccagcatggtttgatcaatgccgcgccctcccccctgatacgcctgggtcgagagatcgtggcgatcgccgatcacccatacgccttgcgccagcgcgggtttgattaccgtttcgacgagctgtacgcgagcggcataaaacatcagcacttccgctttatcggtaatcacttcgtcgcctaccgatcggatatccagcaccagacttcttagtttttcggcaagctgcgtaccgcccggctcacgggtaaaaatcatgttacgaatacccagttgctcaagggtctccaccaccacgtcgcgcgcagtggtttttccggcgccttccaggccctcgatgacgatataattactgcccatnnnnnnnnnnnctagccgggaacaaattcaccgtctaaaaagagttttccgttttccgcatacataacgagcgcattaagatagtccggatcaaacttcacgccgcgcttattttttgcaatatacgtcaaacaatgattatgggtaaaaatgactatatttttattctgcgactttttcagtaacgtattgattgaagcataaataccgctgccgcaatccatcatttttttatccgccgtaagcgacctgcctgcggaaaaccaggttgccgactggatggtgcgcaccgtattactggaatagagattgtaattttgtatatcggcactgaaggctttacccagcgctctggcatcttgcgcaccgttgaccgtaatccccgtgctgtctgacaggcaggtattatcggaacgatcgcaccgctcggcatggcggaataacacgactaccggatgctgcttcgccagcgccgccagcgccttaccgttaatctgcggtagaccgttaccgctccaggcatgttgtgaggccaacccagcgataataaccaacgcccccgccagaatcgcgaaatagcgtttgtttttaataaagcgtagggtaaatgccagcacnnnnnnnnnnnttaaaccgcttgttcaaggctgaaactgtgacagtgaacctgcggtttcagcctgtcgttcataattttaatcgcatcgccaagctgcatggttcggcctgctatcaccacgcctggctgtgccagcagacacaacgcggcattttctcccggctcaacgaccagcaaattaacctgttccgccgtatcgcttaaccagacgcttgcggcatcacccgtgcccggggtccacatttcgccatgcgccacaaaatgccagcttttcggcatctgcggtttgagatagcgaatcgccaccagcgcattcaataccagctccgcgcgttgctctttggttaattcgaaatcccggcatttttcttcaaaggaaaaatagagcgcggcatcatccacacaaaaaccggtcgggcaaaacgcgtccggcgtaagcattttacgagagaagcgcgagcgaaaaagcataccattggcgagatcgagcatcatacgatcgtgctcttcatcataataccagcgccagttatcgtcaggtttaattcgcatnnnnnnnnnnnttataaggcttgcagtctttcatgggcagcaagtaacgtctgatatatgcttaaattcttacttccgggttcaagtagaacttttttaaattcggtcatgagttgctcttcatcttctttcgaacgcatgtattgtggatgttcctggaagaaggtaagcgcctgttctttggtttgtttatatttttcgcaaaaaatgcttgagctgattgcgctattttttgatgcggaattatcagcgttctggtttgataatgatttattcttcgcaaggtctgacggcacatacggaagagactgacactcatcaatactatttgcgttggccagttgctctttctgagcgccaggttgctgtaccggtttgctcacggaggaagggaggggcacctgggcacagccgatcagtaaaaagacaggaagacagctataaaattttttcatnnnnnnnnnnnttagtgcgcttttacctgcctgaaccagtaattttccattttcgttatccatttcccctttttatttttcggtattacgccagcccaaagtaattgcagctgtcggttataggttccaatagcgtgtccaccatacgccccttcaaatgctatcaccatgggtccctgcccttttaccccctcttgtccttcatggcaataaagcgcgttgcggtgagtcacgtcaagttcatactgtacgttcagtttcacacatttttccccgcagccttctactggcggcaatattgtcatggtataaggcgggttatctgcagtggttttccatgttccgatcaggtctttgcaggcgtttatctccgtagccccggaggcggcagaaaatagcgcgagcaaataaaaaggtattagtttcagnnnnnnnnnnnttagtcccaccaaacgtcgaaaagttcgctggttcggacttcttcaaggttgcgcgcttccagccacttacgcacaatcgcctgatgttcttcggtacatttaccgatttcctgcatacaaatcagcccttcccaggccaggtagccgctgccgtcaaacgccagtttattaggctcaataacgtcattaataaagtcatcgacagtcttatcgatctgctcttcagatgtaccttccggaaaacgccatgccaccgaaaatcctaattcctggaattcgtcaatgtgcatttttttacgcagacgacgactacggttctttgccatnnnnnnnnnnnttaaggcgtcacaatcagcagtccctcgctcgcgccggttgcccgccactgaggcgcgtacatcgattccacctgcggttgcgcaagctggtacgtccccggcgttaccgcgcgcgccaggtagaccagcgtcacgggctggccctcattgacaacgacggcagccacaaaccgatcgtcgcggaattccatatactgaatatccgcctgctgcatctgattaagcagattttgcacttcgctaccgctctccggcaggctggcgctgctgtcagccagattctggttttccagctccagcccggccgggagcaggtccaccaccagcgcatccggcacattgcgatcggccactaccgttaaccagaccagcaccagttcgccgctacgcaacgaggacagcgatttgcgctgaccatcggtccccagtatttgtcgttcaatctgcaaaacgttgctggcaggctcaggcgcagatgagggatagccgctgctatccagacgcagccatagcggctggctaccggtgttcgtcacctcaagggcggccagctgatcagcatccagattacgggtcagcgccttgtcgcccgacagcggctgcgcctctaacgaggtctgcgcctgccaggcgcccgcgctggcctgtcgcgaatgcgcggcgaggaacaaggcattgttctcctgggtagagagccagcgctgaccgaaggcctgctcagaaagcgagcttaatagcgcgttttgcgcgtccggtctgaggttgttctcttccagtaacgacaacatcagagcgttatcgcgcagagaactgccgtaatccgctatccattgccgttcgtcctgacgcggcgtattcagagccagcgtaatggcctcttcgccgcgtctggcatcacccatcgtgtttaacgcgatgcccaattgcatcagcggcagtcctgaagccgcctgactacggcgctcccagatttcgcgcagcgcgccgagcggcgctttctgctgacgcgccagtaccagcgcggcgtaagcctgagcggcaaaagtactggcctgggtattatcgctataacgaatcagcatcgtaccgggatcctgcagatagcgcagcagtcgctcattgccccggttaatggcctccggcgggacgctatatccctgctcgcccgcgcgaatgaggaaatccatcgcgtaggccgttagccagggctcttccgccccattttcatcccatagcgcaaaaccgccgttatcacgctgcatctgtagtatgcgggagatgccgatatccaccgcggcgcgccgtttttcatcgctatcgccggtaataccgagcgactgcaattgagcggcattggtatacagcgccgggaataacccgctggtggtttgttccaggcacccgtacggatatgctttcagctcgcgaatgtagcgcgccagattgagcggcggttttccgcttaacagcagttgtccctgtaacgtggctggcgagacgtttgccagatgctgctctggtacatgccagctctctcccggcgccagcgcaatgccgctatttaccgtttgggcaggccaggccggacgcacgccgatttgccactgcttatgctgcgcgccgagggtttctcccggcagattcagaccgctaatggtcgcctggatttcgccttcgccaaaaccttccagcgcgcgtaccggaacgaataaggtggtgcgcacgcccggcgccaggttgaccggttgcggctgttgactaagcagttccagtaacccactggcggcgagcgcaatattcagcgtctgcgggcggtcggtcagattggtgacgtccagcaccagtcgcgaaacatcccctcccgccagaaaacgcggcatattcagctcggcaatcactggcgcggcgacaacgactttgctttcgccgcgaccaaaatcgtccgctgtccatgcctgcgccataacccgcagttcgccgttaaagtcgccaatcggcagcgttacgaccccttcgccctgctcattgagcgtgatcggctgcgcctgctgcgcgatgatattggcatggtttaccggcggttttccgccgcgcgtaaggtcgtcgccatcgccgccaaaacgcaacgccgccagccgcccctgcccttcaatgacctggccgtaaatatcgtagatatccgcaccgtagcgtttttgaccgaagaacgcctgccacgggtccggcgtcgcgtaatcggtgatattcaatacaccgctatcgaccgcggagaccagcacgttgatctgttttggcatttcgccgtgtttaacgctggctttcaccctgacggtgagcggctgattcgggcgcattttagccgggctttccagcgccagatcgaggcggcggttgtcatcccccagcggtagatgtagtaaccccacggcgcgttttggcgtcgcggaacgagatttatcgccgggacgcaccaccagcgtactgagatagagatcgtggcgattccaggttttatccaccggaatcgtgagctccagcccctgcgccggcacgtcgatcgcctgccaccacagcggaccatcgctggactccaccatggcataacctttaccggcgaccggcgcggcgatatgcaatttcatggtgtcgcctggacgataattcgctttatccagtttgagggtgacgcgatccggacgcgccgcgccgctaccgtcgctgttatcctgccagctatagccggcccagaaacgaacgctgctgaccgtctcattcggcgctttgacctccagacggtacgcgccccattccaccgggaagctgacttttccggtttcatccgcgttcagatccagcgtctgctcgccctccaccagatctttttgatcaaactgcgactgccagccttcgctttccgaccagttccagtaatagtcgcgacgctcgcggatgagccgcacctgtaaaccggacaccgcttttttctcgccctgcgcgttggcgtaaacaatatcgaatgcggcgttgctgtcttcgtcgacaatcggttgattaacggtggtatccgtacggtagtcgtataccgctttggcggcgaactgtggacgaattcccggtaacgtatcggcaggccaaatcgcctgctctacgcgacgagtgaccggacgaccgccagactccagcaggctggcctgtagaattacctgcaacggcgaatgcgcttcttgccactggctggcagcactcacttcaccacgtccgcctttatccaacgtcagttgaacttcgtccaggctgcgcgaaagattctcttcggcaatattgccgaactggaagccaggcaacgccgcgacagcgtcgcgcagcggacgcaggaaaagttgcccttgcagggtattgccgttagcaggggcgccatacaagtaatagccgacaacggagaatttcacctcatccgcaggcgccagcggtgttttttgcgccgtcaggttgagcgccatccgctccggcataaagtcttccacgtggaaatcccaactccgcagcaaattatcgccggtgttggcgcggacatgccacaagccggtcggcgcgttgatatccagcggataattcaaacggtatagtccgttttccggctggctgacgacggtacgcatcacttgtccgtctggttttaccacttccagcttaacgggttgatcgggcagcgttttaccgtcgctatcgcgcagtaatccgttgaggataaccgtttcgcccggtcggtagagatcgcgtgggccgaacataaagaactgcttgctgtagccgggcgcgccagcgacattaaactccgacagatccagagccggaagcgtgagatcgagcagcgtggtctgcccctctttacgcgccagtaatagcgccgccgctttatcagcctccagttgcacatgtccctgcgcgtcgctcgtcgcctgcgccagcgtctgccctttatcattcagaagaacgatctcaattcccgactgcgccgcgccgttttccaggctttgcgtaaagatatccagccgactatggtaacggtgcgcggacacgccgatatcgctaagggtaaacaacgtagcggcattactatagttgtagtgtccagcctgattcattaccgccacatatacgcccgcctgttgcagcggcttaatatcgcttaatggcagcagcagtttctcacgcgtattacgcgccggattaagatcaaaacgaccggtataaaccagatccgccattttcagcagattgtcggattcccagttagagagggaactacggtactcccactggctgacaaacgacgccagcgatccgggcttaacgcggaaaaagttcacatcaacgtggttgacgttaagcgccatgaccggcagtccttccgctattttccccggtagcagcgatccccggctggcaaagccgacgctgggctggacatcacgcgtggtaatcgttttttcataagacttgccgaaggtggcgttattcagcgctttaacggcgggatcaaccgtgaccaccagcacgcgctcaggttccagatgacgtaaccttagctcttttaaatttggcgccagctcccatgcgccgtcaacgctgccgcttttcttatcaaccacgtgaaccacacgggagaaatcctgttcaggatctaaaggaattgaaaacgtcagcaccagcgtcgccgcgccgtcgagctgcgcttcggaggcgtctaatagcgtgagcgctttgccctggctttgctgcgccagcttttgtagctgtgacgggtcttgcgcgggcgagggctgcgctacggctggcgcttcgcttttagtcgtcggggcggttttatcgttgttatcgcatcccgccagcgccagcatgatcatgcaggccaccacgcgtaaatgtttcatnnnnnnnnnnnttagccctgatgcggcatcaattccgggtggccttgtaccggcggcttgttgctggtcagcgcggcttcatcagcctgaatgctgccggaattggccgcccatacgccttcatgcgtgtgggtgatctgctgatgctgcgcattcaaatcctcgcccattgccgcaatatgcgtgctttccgtaccgccgctgttagtcgcccaggggatgacgggatcgctggcaaatgccatgccggaaatcaaggtcgcagttagcgctgccgttgtcagaaaaagtttcatnnnnnnnnnnnttagatattccgtaaagaagcaaaaagtaaagcccactcgctcttcgcgcgatagaagaccggcggcttgccaatcggcgcatccacggtaatttcaccgccgtggtgagcttcgccggtccagatattcacccagtgatcttccggcaggtacagcgtccaatcgcaacgcccctgctcgtgaaccggcgcgaccagcagatcctgaccgagcagatattgatatttcagggtgtaggtagcggcatcgttctcgtagtgcaggaatagcggacgcatgaccggcagaccggtagccgcgttttgcgccaccgcctgcttgagatacggtttcagcgtggtaaagacggtggtcatgcgggcaaagtgggcaatagtttccgcgtcgccgtcgaactgccagttattgccggggcggttgccttcatgggtgcgcatcatcggcgtaaaggcgctgaaatcgcaccagcgcagcagcaactctttgctgcgcttcatgtcaaacagggtggtgtagccgccgatatcgctgtgatgcagaccatggccggtcatcgccagcgacaatgcagcaggcacgacagaggccagaccatcatcaagactccagtcaacgttctggtcgcctgcccacatcatggtggaatatttctgactgccggtgtaacccgcacgcataaagaacaggatctcgccgagcttgccggtctcctgtagcgcttcgtagttacacttcgcccacagtgcgggccaggcgttatgcatgatctctgcgctgacgccgttgtgcagatacgtgtcggtcggcagatattcgccgaaatccgccatccagccgctgcagccgagcgcgatcatgttctttttgatgacatctttgaaccagtcgtaagcttcaggattagtcagatcgaccacgccgccatagaattcgccaaactcgaccagatagtcgccgcccgtggcgtctttcgccagatagccgtgtctcgccgcctcagcgcagaggtctttatcactggcgacgtatgggttgatataagagaggaactggacgccttcttctttccactgtttgatccggctatccagctgtggatagttgtcgctattccacttccagttccacatcacgcgcttgccaaaggaggtcatgcggataccggaccagtcctgcgcccaaataccgttcacttttacgcctgcgttgcgcatggtatccagtttttgctgacaaacttccgtaccgccctgaatgccgagcgtgacgccgtcgtaaacccagtccggcagctccggctggcgacctaacagcgcagtcagtttttccagcagggcgatgtaggtgtcggcacactcaaaacgcagcgtagttttatcttcccacagcgccagttcgtgatactccggcgcgctgaagtcgaaattcatatagcagctattatcgacgtggcagtaatacttctgcgtgctgacaaaggtcggttgcgggaagaaggtccagtaatagtcgccgccggcgttctctttacagtctgcctgccaggtgacatagctggttttattacggccaacgccctgttcgctggtccacagcgggaacggcttgccgcgcaaatcgaaataagagaactgttcgccgcagccgtagatatggtcgtctggattagctgcgaggcgtaaccagatacggttatggtgcaggtcgtcgttttgcagatccaacgtcaggcgtcccgcctcatcggcggagatgcgaagggtggcgctaattgttgcgccacggctgaattgtaccagccagccgtcgggtagctcgctgacggtggcctccgttaatgcaatcttctcgttaagtttgtctttgatgctgaagttgccgcgaaacatgtcaatgtcggcaacgcccgcgccaatccacagacaggggttttcggcgctgtggcgtaaaatcaggcgctgttgccagctaagcgcaaaaccatcctgtgatgttgtcagttcaaaatcggttgaccgttgtggtagagaattcatnnnnnnnnnnnctactcatcttcaagataagtataaccgtacagtcccgcttcaaattcctcaaggaactgctgctgcaacgcatcgtccagatccgtctgttttacctggtcgcggaaatgcgttaatagcgttttcggatccagttgcacatattgcagcatatccgcaacggtatcgccttcgtccgacaactcaacctcgacactaccatccgggaagacaaacacgtcaaccgcttcagtatcgccaaacaggttgtgcatgttaccgaggatctcctgataggcgccgaccataaagaagccgagcatcggcggattctctggatcgtattccggcatcggcatcgtcgtggcgataccgtcgccatcgatatagtggtcgatagcgccatcggaatcacaggtaatatccagcagcacggcacgacgttccggtacctgatctaacccttccagcggcagcaccggaaagagctgatcgattccccacgcgtccggcatcgactggaacagcgagaagttgacgtacattttgtccgccatccgctcttgcagttcgtcgataatcgggcgatgcgcacggttttgcgggtccagttgcttctgcacttcatggcacatgctgagataaagttgctccgcccaggcgcgctcctgcaaactaaacgcgccggaagagtagccgatatgaatatcgtgcagatccatttggctatcatgcagccattcacgcagcgagcggcgggtgccaggcttatgcatctcctgccaggtttcccacagattttgcagcgcgcgcggcgcatcttcagcaggggcggtcggatccgtgtattcgttacgctccacgccgataatgttagagaccagtaccgtatggtgcgcagtgacggcgcgcccagactcggtaatcaccgtcggatgcggtaaaccatgctcttcgcaggcatcgccaatcgcccagatgatgttattggcatattcgttcaggccatagttcaccgaacagtcggactgcgagcgggtaccttcataatccacgcccagaccgccgcccacgtcgaagcactggatattaacgcccagcttatgcagctcaacatagaaacgcgcggactcgcgcacgccggtcgcgatatcgcgaatgttcgccatctgcgatcccaggtggaagtgcaacagttgcagactgtccagacgcccagcgtcacgcagggtctccaccagttgcagcacctgcgtcgccgccaggccgaattttgatttttcgccgccggaggattgccacttaccggacccctgagaggccagacgcgcacgcacgcccaggcgaggaaccacgttcaggcgctcggcctcttccagcacaatcgcgatttcagacatcttttcgatgaccagataaaccttatggcccatcttctcgccaatcagcgccagccgaatatattcacggtctttataaccattacagacgatcacgctacgggtcatgccggcatgcgccagcaccgccatcaattccgctttcgaccccgcttccagccccaacggttcaccggaatggataagggactcgatcacgcggcgatgctgattgaccttaatcggataaacgaggaagtagtcgccgttataaccgtaagattcacgcgcacgcttaaacgccgcgttaattgaacgcaaacggtgttgcaggatctgcgggaagcagaacagcgccggcagacgctgaccttgcgcttcgcgcgctttcaccagtttggcaagatcgacacgcgcttccggtacgtcgggatcggggcatacgctaatatggcccagctcgttgacgtcgtagtagttattgccccaccaggcaatattgtaagtgcgcagcatcttgctggcttcctgggagctcattgcaacctcctgcatnnnnnnnnnnnttatccgatacgactgacttcatcaaataaggtggctaacccgctgcgccgttccgttcgcgtcacaatcgcgcctgccaggatccgttcatcggcatacagcgataaccgccgccgcgcccgcgtaacagcggtatacaccagctcccgcgtcacgaccggcgaacgttggctgggtaaaatcagcgcggcgtgatcaaattcagacccctgtgatttatgtaccgtcatcgcccaggttgtatcatgttccggcagacggctgggctgaacggacttgatcgtgccgtccggcatcacaaaccagacgcgtaacccctgcccgcgatcgagcgcaataccaatatcgccgttaaatagccccaacgcgctatcgttgcgcgcaatcattaccggacgcccttcataccagcgagagtgcggatgccgctgaatttttcgttgctgcaccatcgcctgctcaatgcggtcattcagtcccctcacgccaaatgggccttcgcgcagcgcacaaagcagttgatactcattgaaagcctgaaggattgcctccggcgccgctttttcatgcagcaaccgcaggtagcgcccatagcccgccagcgcttcatccagcatcccggcataatcgtcgctgctttgcaatgtacgcttctctatatcgctaaacccctgctgaaaaacagcctggattgccgacctgtcgccacagttaattgccgccgccagcttgccgatgccagaatcgctgccgaaacggtagctcttttgcaacaaacagaggctatcgcgtaaagacgcggcttgcgttccggcccccgccggaatggcgctaccagtgagtcgacttagctgtcgggcgcgttccgccgtaaaccctgcgttgacataggcgcaaatatcgcccaacacagcgcccgcctcaacggatgccaactgatcgcgatcgccaagaaaaatgacccgcccgtgcggcggcagagcgtcaatcaaacgtgacatcatcggcaaatcaatcattgatgcctcatcgaccaccagcacgtccagatgcagcgggttgcccgcatgatggcgtaatcgctggctgccgggctgtgcgcccagcagtcggtgcagcgtactggcgtcctccggtatacgctttttctgcgcatcggtaagaggaagctgacgcaacgccgcgccgagcgactccgtcaggcgtgcggccgctttcccggttggcgccgccagccggatacggcaacgttcgccatccgccatttgaattaatgccgccagcagcttcgcgacggtggtggttttaccggtgccgggaccgcctgaaatcacggagatacggcgagttagcgctacggcggcggccaccttttgccagttcacctcgtctgtcggagggaatagcgcgtccagaatacgggataactgatcttcatctacggcgatggcctggttaacctcgttaaaaaagcgcgcaaccgtacgctcgttgcaccacatgcgattcaggtagaggcgatcgccgcacagaattaacggcgcggggctatcgccgcagctaaccgccgcagacgccagtaaccgctttttccagtcgattggcgtagccgtttcgcttatccaggcgaccagtaagggatgcgcctcctccgttaacgttaaacgcgacaacggcagacacacgtgaccttcacctgcgtcatgactaagcagcgctgccgccagcgtcacggcgggatcgtcgttaccggcgacggttaaagcaaactgggcatcaatgggccgtaagagtttttgttcaacggcctccagcaaccgcttctggattgtcatnnnnnnnnnnnttattcctctttctgtgtgggatgctgtcggccaaaaacgacctccatacgggcgccaccgagcagactgtcgctggcaatgatctgcccggcgtattgttccgtaatctcgcgcgcgacagccagccccacgccttgtcctggtcgtagggtatcggcgcgctgaccgcgatcaaacaccagggaacgtttgctgtggggtatgcctgggccgtcatcttcgacgaaaatatgcaaatgatcgtcggtctggcgagccgaaatctcgacaaactccagacaatatttacaagcgttgtccagtacgttgcccattacttcgacaaagtcgttttgctcgccgacaaaactgatctctggtgaaatatccatactgatattcacccctttacgctgataaactttattgagcgcggagatcaggttatctaacaacggcgcgacgggatgcagttcgcggcttaacaacacgccgctaccgcgcatactggcgcgatgcagataatagccgatctgctgggaaatccgactgatctgttccagcatcaccggttcagctttgctgacgctcatcttttcgttgcgtaaagagcgtaacgtactctgcaaaaccgcgagcggcgtttttaaactgtgcgtcaggtcggttagggtcgtgcggtatttgttataacgttcgcgctcgcttttgagcagttgattaaggttgcgcacaaggctggtcagctcacgcgtcgtctccggattgagcatttcgcggtgatgatcttcaagttcgcggacttcccgcgccagcgcctcgatagggcgtaagctccaccaggcggcgatccacagtaaaggaatgactaacagtaaattggcggccagcacgtatacgaaccagctccacaccatataggagcgttttagctctatcggaatggtatcgaccaccacgatggttaactgcggcatccgcgtcgtggcaggataaatatttaccgctaccgagtgggtcatctccgcatcatcgtcatcttcacgtacttctttgagtttttcctgcgcggaatggtcctcgctcaacagcgtgctggtggcgtctacgttggtttcaatttcatggaagccgttcgtttttaaccattccggttgaatgcttttaatcagccaggggatgttgcgctgcgtccataataatttgcccgtttcatcgtaaatcagcgtcatggtcgggctttgcatgtccagattttcaggcagctcaacgctgattttattattttcccatttggcgagggtataaaacaggttgctttcgccgcgcagcagacgaaacgtggttttatcaaaacttacgctatagccgaccagcgccactatgccatatgccagagaaagcaccagcacgacgccggctgtcgccagcaaaaaacgaacccgcagcgacagcggcagaaaatggcgagcaaatttattcatnnnnnnnnnnntcatttttctgtgatttgttctgcaagtcgggcaatacgccttgccattccccggaaaataaacaggtgcgccgggatcatcagtagccagtaaatcaggcccggcattccgtgtggatgccaccaggcgcgcacgtcaatttcgcggtagcggcctttatcgtgcagcgtgaagctaagccgccccagacccggcgctttcatgccaaacaagagcgtgagctgtttttctggttcgacaatgatcactttccagctatctaccgtatcgccaggcttgagcaaggtatgcgacgggcggcctttcgccagtttatgccccaccagacggtccatcgcggcgcgcgtctgccacaaaatattgccgaaaaaatagccctctttgccacccagccgatttacgacctgccatagcgccgataggctggccggggtctgcgcggtaaagcccgcctgctttggaaaatagccgtattcgggacgccagcgggcgaaggccagcgcgtcgtagccccagtcgctggagttcaccagtttttcttcttctttcagcgtgcggcgaacggcgtcatcaaaggtgataagcgtttgggggatcaacttttttaacgcggcgtcatcggccagcaaatcgtgccttaatccctggattaacgcttttgcggtagttggcggcacggaggtaatgacgtttaaaaaccagaccgaaatccagcgggtcggaaaaggcaccgggatcagcggacgccgtttaccgctgacggccataaaacgttcaaactgctgctgataacttaatacctgcggcccggcggcttccagaatacgatgctcgtgcgcagggtgctccagtaagccgaccaggtagtagagtaaattttccagggcgatgggcgtggtgcgcgaacgcacccagcgcggcggcgtgagtattggcaggttgtaaaccatgtcgcgcatgacctcaaaggcggcggagcctgcgccgacgatgatcccggcgcgtaattccgtcaccggtacgcctgcgtcgcgcagcgtgtcagccgtaagctggcgggcgcgcaggtgatcggattgctcatgcgccggcgcctgcaatgaactgaggaaaataagttgtttaaccggcgtctggcgcagcgcgtcgcgcacgttgagcgccgcctgacgctcatgggcgataaagtcgccgccttcgcccatgccgtgtaccagatagtaaacggtatcaatgtcgcgaagcagcgcgggtaaattttccggccagtgcagatcgaccttatgacaactgacgttggcgaggcgatgtttttccagacgttccacgcgccgcgccgccgcccgcacctgatgtccttgctgacttagcgcaaagaccaggtgctgaccgatatagccgctggcgccgaggaccagaatgcgttgcgccacnnnnnnnnnnnctagatcacgtattcgatcaacgctggttcttgtttacagaggcgacgccagtcgacaatcggcattcgtacctgcagactgacgctaccgtcttcctccatccactctttttctattgcctgaagctgataaaaccggcttctcagacgcccttcctgcggcagcaaacgcagcgtatgctgcgccacctcgccggaaagacgctccgtcaaagcctgaaaaagctgtggtattcccacgccgctttgcgctgaaagccaaacgcggatgggtttattctcttcatctctgtcgatacgcggttcaaagtcgtccagcatatcgattttgttcatcaccattaaggtggggatttcgtgagcgtcaatctcttcaagaacggtgtttaccgcctcgatgttttcctgcacacgaacatccgccgcatcgaccacatgcagcagcagcgtcgcctgacgcgtctcctgcagggtagctttaaaggcagccaccagatcgtacggtaaatggcggataaagcctaccgtatccgccagaacggtttcaccgacatccgctacatcaatacgacgtaacgtggggtccagcgtcgcaaatagctgatctgccgcatagacccgcgcttcagtgatctgattaaaaagggtggattttccggcgttggtatagcccaccagcgataccgtcggaacgtcggccttgatgcgcgactgccgcccctgctcacgttgcttctcaactttctccaggcgcgactgaatctgcacaatgcgattacgcagtaaacgacggtcggtttcgagctgggtttcacccggaccgcgcaaaccaatcccgcctttctgacgttcaaggtgggtccagccacgcaccagacgcgtagccagatggcgtagctgcgccagctcaacctgcaacttaccttcatgggtacgcgcacgttgggcaaaaatatctaagataagaccggtgcgatcgataacccggcactcgcacaaacgctccaggtttcgctcctgggctggactcaatgcatgatcaaacaatacgaccgctgcgccagtcgctttcacggcttccgcaatttcaactgccttaccttcacctacaaagtacttcgggtgcggtgctttacggctaccggtaatcacctgcattgcttcgacaccggcggaagagaccagagattcaaactcctggaggtcttccatatctttgtcttgcgaaaaatagatgtgtaccagtaccgcctgctcaccggcatcataacggtcaaacaannnnnnnnnnntcagccgctaaacacgttaccggcgcccggcgcgctttttaacacccagacgcgaccatagtgattataccatccggcacgatgcccggcatccgggccaatcccctggtaaatatcaaagtgctggcctttaatcgctccgccgacatccagtgcgaccatcaaacgtagctcatactgaccgctaaatttaccgttgttatccagcaacggtacttccgccaacaaggttgtgcccggcggaatgatgctgcggtcggaggcgacggatgctcgcccaatcagcggtacagcgctggcgcctttgaccggcgcaaaagattgcggtttaaagaagacgaacgacgggttctgctccagtaattcacgcacttccgcttcgctgtgcttctctccccattcgcgtatagcctgcatcgacatatcttcttttttcacttcaccgcgatcgataagcactttaccaatactgcgataaggccagccatttttaccggcataactaaagaagttcagcggactaccatcaccgaaatcaatataaccgctgccctggacatccataataaagttatccatcagcgaattactccaggccaggatgtacttatcgctcagcgcgcctgcgtagatctgggcgcgggacggtaagcgtccgcgttttggcggcatactatagatagggtactggaacgcgccctggcgcgtatggcgagcctgaacgacgggcgtatagtagcccgtgaactggacgttaccgtagttgtcggtgccttccatctgccaggcatcgataccaaactgacgcatagtgcgcgtatcgcctccggaacgtaaccagttctggacagcgttatagacgttgctttgattggtgtataaacgcggcgacgcggaacggatctggtcgacctgctcggcaaagtcaccagcattaatcggcgcgcccaccgcgtccggctggtttaccagggagaagggctgggtaaatttcccgtccttatattgctgaccgcgatcggtcggttttgatgaacaggcagccagcattgccagcattacgcctgtcgccacatattttgcccaacgtcctttcatnnnnnnnnnnntcattctgacacctccattttttgcgccattttggatgctctgtattcagggatggtggtcacaatcgcaccgactaacgcgaacagcgtaccgatgatggtgaccaggtagaccgtattgcctaatgaagggatcaattcatcaattagcactgagccaagcagttggcctgctgttgacgctacgcccagcatcaatagccctaagcctctcaccagaatcgccattagcccgatggatagcagacccagcggaccaccgagatacatccaccatgtatcgggtaactggatggtgacatggcctaatgcgatacgtatcgccagcgccgcgcccaggacacaaaagccgacgatgaagttccatgtaatggacaccagcatggagcccgttgcctcggcgactttcgcattccccgcaggctgccagccagcgagtaaccctgccaaaaaggggaggatagcgagcaggataaacgaggttgagtgccactgtggcgacacgacaaaaatggtggcgataacggcgaacaatgcgccagtaatgcgccatggcgtaaaatattttttctcctccacgccgatgccaaaacggtcgcacagcaggccggaaagaagcagagcggaaattaatgccgtttgaaaggtggcaacgcccagcgcgctggcggatgcgccttcagaaaatacgaccatcgccccgcataatcctgcaaaccaattccatagcgggatttttctctttttaatcagagtagggattgaggcgaattgctggcgtgtttctttgcgcgcaataataataaaaaacatgacgaccagaccgctggcaaacgagattactgcgcaagcattaccgtcttgtaaccaatgtcctaactgcccattaacggcagactgcatcggggaaagcataccggctaagatggtggcaagcatcagtaagggggttgagtacttattcttgttcatnnnnnnnnnnntcagttaaacggttgtaagtcgacacgcgccatcattgcggccaactgaggacgatcggtaatacccacattgctctggctgaccgccagcgcggcaaccgccgtcgccagacgcagcgtatgttcgctggactcgcgcatcagcaggccgtaaatcaatccgccaaccatggaatcgcctgcgccgacggtacttaccacgtcaaccgccggtggtttagcgatccattctcctgaggcgttaacccacagcgcgccttccgcccccagcgaaatcaccacatgagcgataccctgttcgcgtaacgcgtgcgccgcatcaatcacatctttcatttccgggagcttacgacccgcccaaatttccagttcgcggcgattcggtttcaccagccacggcgcagctttaagaccggcgactaacgcttcacggctactatcaaagataatgcatggacactggctgcgcagacgcgtcatccagtcggtgaacgcttccggactcacgccagccggtaagctaccgctgacgcagaccatatcgaactgacccagccagctcagggagtcgttaacaaagcgttcccagtctgcgggagtcacgtcaaagccggaaaagttgaagtcggtcacttcgccatctttttccgtcagcttcacgttgatgcgggtccggccctgaaccacctgaaagcggttagcgatacccagttcgctgaataattgctgaaaaccgtcctggttatctttaccgagaaaaccgccgacagtgacgtcgatgcctaagtctttcagcactttggcaacgttaatgcctttgcccgccgcgtgcagacccgtggttttcaccaggttcacnnnnnnnnnnnttaccattgcgtgccaactcccacgctgtctaaccagtctgaaaccacatcatgcgcgctgtgcgcggttaaatccacatgcaacggcgtgacggagacgtagccttcatccaccgccgcgaaatcggtatccggcccggcatcgtatttatcacccggcgggccaatccagtacaatgtattaccgcgtggatcttcctgcgggatcactttatccgctggatggcggctaccgcagcgagtcacgcggatgcctttaacctgcgctaacggtagatccgggacattcacgttgagaatacgcccggtacgcaacggctcccggcttaaccctcgcaaaagcgcgcaagtcacggctgcagccgtatcataatgctgatagccgttaagggagaccgctaatgccggaaagccgagatgacgaccttccatcgccgcggcgacagtaccggaatagatcacatcatcgcccagattcggacccgcgttaataccggaaacgacaatatccggacgcggacgcattaaggcattaacgcccagatagacgcaatcggtcggcgtccccatctgtacagcgatatcgccattatcaaaggtaaaagtacgaagcgaagattccagcgtgagggaattagacgcgccgctgcggttacgatccggggctacgacctgtacatcagcaaactcacgcagcgctttcgccagcgtttgtataccgggcgcgtgaaccccgtcatcgttactcagcaatatgcgcatnnnnnnnnnnnttagtccccttcaaggagcaatacagacacaacaataatgataaaaatggcgaaaaacgacgctgttatcatcagcgcttcaagaaacggtggatcgtacatnnnnnnnnnnn diff --git a/tests/test_data/outputs/extract/G11/blast_db/contigs.fasta.ndb b/tests/test_data/outputs/extract/G11/blast_db/contigs.fasta.ndb new file mode 100755 index 0000000000000000000000000000000000000000..5fd6f7085890c929e9fa647574a304e6e7c1d317 GIT binary patch literal 20480 zcmeI&O-jR15CGt@3T7h_@eblfJc3sVVx^$TqQ!Nu)s6HdF1xii(B|hgQK1OcMd8aL zZ(j2%Z#3(Lfo-gxxPK5vg0t5&UAV7cs0RjXF5cpex+fBMroW@J_R2I(_uM9+m(A literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G11/blast_db/contigs.fasta.njs b/tests/test_data/outputs/extract/G11/blast_db/contigs.fasta.njs new file mode 100755 index 0000000..bf3dda2 --- /dev/null +++ b/tests/test_data/outputs/extract/G11/blast_db/contigs.fasta.njs @@ -0,0 +1,22 @@ +{ + "version": "1.2", + "dbname": "contigs.fasta", + "dbtype": "Nucleotide", + "db-version": 5, + "description": "locidex/extract/G11/blast_db/contigs.fasta", + "number-of-letters": 23855, + "number-of-sequences": 1, + "last-updated": "2024-06-10T11:10:00", + "number-of-volumes": 1, + "bytes-total": 43165, + "bytes-to-cache": 6209, + "files": [ + "contigs.fasta.ndb", + "contigs.fasta.nhr", + "contigs.fasta.nin", + "contigs.fasta.not", + "contigs.fasta.nsq", + "contigs.fasta.ntf", + "contigs.fasta.nto" + ] +} diff --git a/tests/test_data/outputs/extract/G11/blast_db/contigs.fasta.not b/tests/test_data/outputs/extract/G11/blast_db/contigs.fasta.not new file mode 100755 index 0000000000000000000000000000000000000000..d6562660b009ba390419e760e6e10a80c529e3d8 GIT binary patch literal 20 OcmZQ%fB;4)4Wa-5Gynqt literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G11/blast_db/contigs.fasta.nsq b/tests/test_data/outputs/extract/G11/blast_db/contigs.fasta.nsq new file mode 100755 index 0000000000000000000000000000000000000000..7c5a6c009883188f70f10885b4e3d7bf3749b25a GIT binary patch literal 6057 zcmWOAdpy$%9|!OuD{?!m)pU*$CP$rIW<`XfX&WOy!sclieq&~-rGGh1_NK^vCz}$LIC>{QrJ^VJ~-z1Cw$6niZa=wCLmf z?dfV0Ir2x;Wprn*+c@!g#G(Hj_!;AYali5ID0h{5Vf-dYz&M5JoyYBz2iioJT4xY$ z;jTUF53Y}Ie?OhBI;C{12hrrh-G{ZqqpvI+G)e~ovLX~iGF%>+Xur7=T`*Zh; ztle!J zRFy_^|!srTxq#vnm_veJc2I|H2@sv`AZZ=|k z>{pNCsRwiaK3iU<$pQ~WMC$IX960K2loC*8R`F?%W5$L!_1b4OW(L`yC@x}e*}+1p zga_#X<|INzCB0`_VMeL(uP(nVlGI4|b2zMF)h6RN}S5s{`Wn{%k@+_3lW+ zLTd&#;R$Fxz1*XKjvq_zqE=tac^nPqY;hDQ0ebC3XDco1BOP4;BUy7|=a_~&SOO$A zyZ;8B^HnqSEBh|U=&gidzHD;@R(dLv5J8P}zgjgz&|iM;K(xcG!awiVC7atO9>{-S zzpE1$3e4c>z)deHn2o*s*f*N|S1c_f_YZAGKfYO85~j}AB`hynw{=R$h9pmo3;!vN z%`VZkS6nTviHUGeF}U{4_?t7sLO*p2`h3|agj|j=44rzAURE_1w@r>bS0GK>Ksl6zNLwCJst>Dq${?7+ zR$Vs2T+S9^g9rsZowj80RBEkpqzl7ysb@FrIaDSKGSOnjd!g=2p9aoeyFy39tUC`0 zbFwg9*eMRC0|EJUH`I2>9Rwy#Pm8&uvOCs$wkN-uYFqkip!V12J2W~&vfrmT`$q>+ z2M@jfLcOB@6#SA}`{k8J4m0}AHV=BE$&GXGOH5gJn3x-lu}@~0nQyw9ezxZ?PqE5x zDm57*u6(4dpN%WI{U`3jVLql#+w_X*8JK4NSjf@2*&9$z4O=^Q{u9T|N1MJQi^MBoK-q&lUImSJ4T2lEGadc9;_HROAoRkZDAgca6bPnN2 z*I_L6;3(5NFk(&rZzOQ)=mk6foe8(i2bTFIB(y-!I*(m{JmLPLB?SYY}# zhK&glD=%4awlEl=viIldS*;(DcNPtBCD2p@FE2#{B-t?Ht*dbe$fYb$I26>`gvhW4 z7Rg=S*G;2WB#5EE{X84z_vgb`RCaxk)%9#XyBi(l>aX*hs+Pvs9Rn;Z}Hbp>YAB?EV;^`Xi$V@T3!2)jvej1ven~rdy7mijBf{LdxbIx#M~Ih~ zC2bwJXzKTg+Wt5tYWoU)s%YAbSM)ChMEmg1%qD&y0{l$h@b#dn*K8=N_FoEqKI5jF zJjD}SJgU2ZrgxS^K7Fk84TXryl$DB>AFZ4HbdN}dkioo|eW$wGYC5z3h96zaZqJ_z+XP3n}6%bn)oxK{+iWF(c z^aOhrPY%LjP@qjA`-D%1D#2jb%H>9aSd(!DP7SK0u*8Nn&OOh_-5DZF!uE*^>jjGu zt?-moTrxAOA}>BhV{=DM;N@Ir)HeJLFs<&jhcLwJV*2H4Yw~y$0!T>CzBVlNAZlEY zX(w%gV@itepqz~}jSG&w)gN5eo{AJw=)%`N^V5ygOozo(mwoSQP&sjQGXFG7GDii@ z*p%bpaP~M$HWa&j9M41*1_s6JjcX`s7jd5pdgNSK5PozWvr)(I&3*=rv?9>~ zKJKILEo7=%PNlCVY;cZLil-h7(+q3#;E%f#n0xDysZycE`^W_*yt}Mi2b?-Bj2e%l zftLMTiQt~`)_g`9w4|_*aE~16^&L3T?cLC_aTt)$wLL}|1}l*%dPHwN4K3%!#guGT z{6?%-G;#0np6zi8_x{7U_v`=>L_g6eBZ$ioDVE~DndAH3@Msd(YYwbgrKc-Q_>EAD zGZ5F&WB%%X>wrYEjfukiHZaoKxdXUp-hwR?uSLXSW9kN$OLbd8CUY0|Wcg)>o$Qs5 zd?|=FYfDPc!Sk=2w2C4;9yvDJ1@3VI`KcHIuwr(fEskXgAi{&fHCf-LZT}jFT7XdE zg57$;rfvf5bsZDBg^I?Ws?>tND4TuN^76jTgshY2-xW$-K^GgyXH?c)b+6c+PI0&L z+D3V$Yef#6=N=mJ0KV=jIbD@>q#_-TBILwhM3rdIli<_iP3Yi^vil1+HOH#B(Z?#v z^k|ZbTNwJZVXNBVs^ZGyL=SW-T$H2xFo=LEvhUB`uKV@;nz4#k6b!N8>{#`jx9?bx z`8C4}eK~C<%UTUL2#sX^pd@z4?ty_gt5}heDgkZRUwk%ypp1{IfVw2mV(LrQpUk!4 zFIW~7H`NW{YiXI}N4C|caaIW)&C-I3rx*+k~OKuuE zGXTj&-XpgvgTSeGjnv*;lx3Um7_^cbBO=*;Z}@j=1+q#AGnAkodr6Lftn<&W zC^-kch>t(=!@DIDT!e~A;t74YL|;=od@yNK(q8ydwV==};IV;vTv5|bMmg*x?gz70 zm`NX@#h1@Gg{I1KyQayB^Uyf|8`tp(lVRn-*CCS#vD|Pd=iuo20n)ne-nwF3R>AbIY&MV?RqeGM7rIr#d*{9jkg4&sX<>RC})5RrTZGs^mF|B zNvL9;zI{Mk=Pf@f=+)W0V^$zm2Gj$Zsq~ElorPnHtcPG@Tgd&gB8DoX9qs3*0f@qn zHA9EbYH<@Q~2I=+89kwrc`6qg%W zDLwT~8B~$>5db3MU&zO61Q8wiqvvK?A1qH@S)pqk^l8io=|}tV#M(vZvl!L~ec|0i zdm-LF{L;`28(NO|d~fIcgx%O_8E4beSIB4gV|zT#?zUi0qOkortS_@E@)Fl3r^Lk^ zze)cU>+TryZozQ4chB+k)1>%KUcNo#`Fc>x{&T==RL;*M4s1YmO40&Z#&@@ zGJ5W4A83>2+*`fY9%#C)_h!6|UjPx1!9D}2v``Kq#C)D&l>PHhIVh>?#P~&QHhlM< z#)wzQryi&q#ajGCH-%mU`B;+GB5+?@>2_oEziCUH&WZlHf|7 zfC3h)U9FMC7llrtK&%WPQT(y;sTHFx&OhBZb*afPLdv0Vs{C-%Au?#I$OQ}~;qZIs z*S)oN-IUi^ihfI^90N;KH-UG97bQbbGg!Apx$H0X>MqEs=88_AZ*=rz43eyilPnl ziA7{JWfkP{vl;NkU~=HtjDpAI2$?mloi&sU!KL0m9Hslvr=_BxRK-5r@P)YBPW1jX z^K6U#veQyGYSY$4m>#5ix2OeYCY-Oj9u3~uIl~ZDV9B<+d_1@ZnYyqyPx>NJJ_iVa zr=?KRz)Yf+(7P<%cbv0T9Ppzxq~M8tMLCVLjLarItq&xl^19zO&2RzW9XpcO&{xQv zdz*UUaqb0%7kq25IfToy{3OYrU{eyl4RoBN(`;;l1fNodf9{sXW(a$JAYN{je5e=P zVeKD`G?+BP4vh%;I_FG&!uxodh(GylR|M{$X%DZT3-_|vpEBkgY~kD+HY;)1@g_@T zMVve3Ev4O_Y!+QK@g;Xcjw<0~wo~{>%B2B|{mldnWxjYS^X%y2O5Lu6?LSRw(a~a! z)@PPI^M8%X`fsnc%ZrMaxyv)PkV!?gtuN~V<&d>nG?6pEkAC%Uh#-jmIl(rF$jCWd zxBI{;KOI%MbwZb}Z4lzsABkVjW7DbzgM8g)*kuE($3zF`*;wQ+)%(eqVbXj8XMLZV zEi$?htG=@y(4RM7Rs>Jh zxNAbzg_RB)YK!G`g5{-?t=+JTu}Vd!(X==bx zldOjl%}8_lfnGJCyR#m&Th6ainu|`z{pJdBmenK?oux4OE-ToJUC`Af`ncPDabPPlnc=V1tTiQZxai0(XH@j3ZJRn7Q(5Gv zMCJ5Z2$_CvI`99#@hJV9jFnRgq>uMk9ao&&r`R8yDT4te$D0`=U>W9YS2rDNv1u5_^HsCjh4u* z)nk-2GT6oJ*?Zr*JMWte=WhR}=Sg(Wzw^^dv?<}KG8mrk5 zl1cM_IENUxksDe-;ki)joV^~YQVom1^!`T~82AgB1@e?9@>@3nes_UqQu~M!cVJ31 zWS5zHE(9Zb2B`$glo;(p%$c2;aP#IVpriN3LUHV)rKDhaz0KsH|7fau2TnNn^vJA_ zMYKK;tU(rR)ZW||cbbIM@o^w4Qp|!4Hg}WI#y=5(+`qbu(4g}9h&0~(DT}U=7X0}9 ze|O)b3RPmaC@S1s?DQhy^7};O%TF~Yg4sk;_E8IzlQKb`RFZ&beott6o>)9FU9Tmg ziQqZYH1?2J0}yB^1#x0Z${O&8zNE_d@B4FylF2#Mwaa5f0^tx4h`Iopc`Cf`tFg3A zXP45C2Wp_eRo9U>#N5ZdhBs*KLeqrsi5f&zM!llrN~w>`=Y@rh-oEmw7F1VAfsf58 z!CAnN_cheb2_GUtglCg*2eDNaxeo&bM$y}`00;r;UN)Hr_666y$63EGY&kC7A@>L+ zvI&IWb=h9}e`E`}*>L-J3JX;_4d$lGHjngy{ER@i1RYQH+aSKHm$}@ONYU2Y6=!S| zv8=Cxhd>gMtDq0og^-8(yG-gD2ct_h=QW1A{+D+>az*uIX$3NHXwy1C!;@87*JhQ! z1|mo6j($4D4sI{IS`dG!0D04@CcnT>zw+H*Z+@GfMX&VDn19<$L-Axvp2hBHra?$x zk!l=7Gj#cKe2n;@{QbF@K4OH957|2)4OE6RcOY20{Ksp;{w1ni(-u?u3Gy+t4$dqq zJFHm@KM5B293GV|+%Dc08e-hvEQ%gdBX|sM>Lv=Uz1a8W8Lw0`u9cLq%eXOgld#iI zKGrdH3Hi2m|7;YDO#5Yfv|w{8VTTGnqgW3mKeCLdod=q?is|mbO2x*I*ay0M#}Fh} z=rg^^@bIMhNps74CrCJcB(|OsT9B~TONl?)f7j>@@?_3}vh>It`Ms(FN%-A2PO-KX zYOl(PH|uTL*%f*>pYeA^`|*4|XtS?`kRUTkevznujoiV%#J9@a+ApUI{g7!^hh-ps zF@rwSS+dQ?<(3HLZueb^PAJ1_>EEP}glu^rM!#)Oq|mBgd_CW+6&EBw{KYUDo*x)b$bBol@VyHqtis6OcmJthg&nv4Ka4!4R{#J2 literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G11/blast_db/contigs.fasta.ntf b/tests/test_data/outputs/extract/G11/blast_db/contigs.fasta.ntf new file mode 100755 index 0000000000000000000000000000000000000000..005ac416d78d808991db961db8e272a0664078b1 GIT binary patch literal 16384 zcmeI&O=`kW5CGt5>cWK(D?Lc@1YSi@p{p+HqW9@Fr1T_$d!a|DnwR0}#*Gvzd|71X z{g6z)-S85T*RV^r-#=duIdzJkyQgkAju_6m?CR~ezpidNTg?3z?>t+V5+Fc;009C7 z2oNAZfB*pkw+qz1`!D+c`t+KWnseOrpYYhnoWK3cJb?fK0t5&UAV7cs0RjXF5J-XS zzayHM#HQV}xgruEK!5-N0t5&UAV7cs0RsO);N@6E6QfwqKVBEpx7F%>xjxnP9Q72v tW~JgB=Xra^(*N(D`o%y literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G11/blast_db/contigs.fasta.nto b/tests/test_data/outputs/extract/G11/blast_db/contigs.fasta.nto new file mode 100755 index 0000000000000000000000000000000000000000..20d5cb86e6dff1f3684dc229a358a2ea697cecfb GIT binary patch literal 8 KcmZQ%fB*mh5C8%I literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G11/filtered.hsps.txt b/tests/test_data/outputs/extract/G11/filtered.hsps.txt new file mode 100755 index 0000000..cb9661f --- /dev/null +++ b/tests/test_data/outputs/extract/G11/filtered.hsps.txt @@ -0,0 +1,24 @@ +qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore locus_name ext_start ext_end is_5prime_complete is_3prime_complete is_complete is_5prime_boundary is_3prime_boundary is_on_boundary reverse complement is_extended is_5p_extended is_3p_extended +0 0 102 23855 0 101 23843 23742 102 0 100.0 100 100 minus 1.92e-51 189 locus_1 23742 23843 True True True False False False True False False False False +9 0 762 23855 0 761 23730 22969 762 0 100.0 100 100 minus 0.0 1408 locus_10 22969 23730 True True True False False False True False False False False +10 0 858 23855 0 857 22957 22100 858 19 97.786 100 100 minus 0.0 1480 locus_11 22100 22957 True True True False False False True False False False False +11 0 972 23855 0 971 982 11 972 0 100.0 100 100 minus 0.0 1796 locus_12 11 982 True True True False False False True False False False False +11 0 972 23855 0 971 22088 21117 972 0 100.0 100 100 minus 0.0 1796 locus_12 21117 22088 True True True False False False True False False False False +11 0 972 23855 0 971 982 11 972 0 100.0 100 100 minus 0.0 1796 locus_12 11 982 True True True False False False True False False False False +11 0 972 23855 0 971 22088 21117 972 0 100.0 100 100 minus 0.0 1796 locus_12 21117 22088 True True True False False False True False False False False +12 0 1098 23855 0 1097 21105 20008 1098 0 100.0 100 100 minus 0.0 2028 locus_13 20008 21105 True True True False False False True False False False False +13 0 1281 23855 0 1280 19996 18716 1281 11 99.141 100 100 minus 0.0 2305 locus_14 18716 19996 True True True False False False True False False False False +14 0 1434 23855 0 1433 18704 17271 1434 0 100.0 100 100 minus 0.0 2649 locus_15 17271 18704 True True True False False False True False False False False +15 0 1464 23855 0 1463 17259 15796 1464 15 98.975 100 100 minus 0.0 2621 locus_16 15796 17259 True True True False False False True False False False False +16 0 1836 23855 0 1835 15784 13949 1836 0 100.0 100 100 minus 0.0 3391 locus_17 13949 15784 True True True False False False True False False False False +17 0 1914 23855 0 1913 13937 12024 1914 0 100.0 100 100 minus 0.0 3535 locus_18 12024 13937 True True True False False False True False False False False +18 0 2037 23855 0 2036 12012 9976 2037 16 99.215 100 100 minus 0.0 3674 locus_19 9976 12012 True True True False False False True False False False False +1 0 285 23855 0 284 9964 9680 285 17 94.035 100 100 minus 2.46e-124 433 locus_2 9680 9964 True True True False False False True False False False False +19 0 4935 23855 0 4934 9668 4734 4935 0 100.0 100 100 minus 0.0 9114 locus_20 4734 9668 True True True False False False True False False False False +2 0 327 23855 0 326 4722 4396 327 0 100.0 100 100 minus 5.7e-176 604 locus_3 4396 4722 True True True False False False True False False False False +3 0 417 23855 0 416 4384 3968 417 11 97.362 100 100 minus 0.0 710 locus_4 3968 4384 True True True False False False True False False False False +4 0 444 23855 0 443 3956 3513 444 15 96.622 100 100 minus 0.0 737 locus_5 3513 3956 True True True False False False True False False False False +5 0 543 23855 0 542 3501 2959 543 0 100.0 100 100 minus 0.0 1003 locus_6 2959 3501 True True True False False False True False False False False +6 0 606 23855 0 605 2947 2342 606 15 97.525 100 100 minus 0.0 1037 locus_7 2342 2947 True True True False False False True False False False False +7 0 642 23855 0 641 2330 1689 642 0 100.0 100 100 minus 0.0 1186 locus_8 1689 2330 True True True False False False True False False False False +8 0 684 23855 0 683 1677 994 684 0 100.0 100 100 minus 0.0 1264 locus_9 994 1677 True True True False False False True False False False False diff --git a/tests/test_data/outputs/extract/G11/processed.extracted.seqs.fasta b/tests/test_data/outputs/extract/G11/processed.extracted.seqs.fasta new file mode 100755 index 0000000..2197d22 --- /dev/null +++ b/tests/test_data/outputs/extract/G11/processed.extracted.seqs.fasta @@ -0,0 +1,42 @@ +>locus_1:0:0:0 +atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa +>locus_10:9:0:1 +atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa +>locus_11:10:0:2 +gtgaacctggtgaaaaccacgggtctgcacgcggcgggcaaaggcattaacgttgccaaagtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccgacttcaacttttccggctttgacgtgactcccgcagactgggaacgctttgttaacgactccctgagctggctgggtcagttcgatatggtctgcgtcagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatctttgatagtagccgtgaagcgttagtcgccggtcttaaagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcacgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgtgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggtaagtaccgtcggcgcaggcgattccatggttggcggattgatttacggcctgctgatgcgcgagtccagcgaacatacgctgcgtctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga +>locus_12:11:0:3 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>locus_12:11:0:4 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>locus_13:12:0:5 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>locus_14:13:0:6 +ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctcttccgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccaacgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcagaaaggcgggattggtttgcgcggtccgggtgaaacccagctcgaaaccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgaaaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgctgctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaaatccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtctgcaggtacgaatgccgattgtcgactggcgtcgcctctgtaaacaagaaccagcgttgatcgaatacgtgatctag +>locus_15:14:0:7 +gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga +>locus_16:15:0:8 +atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctggtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatcagcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacgctgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcattcaaccggaatggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaacgctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgaggcgctggcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtgagctgaccagccttgtgcgcaaccttaatcaactgctcaaaagcgagcgcgaacgttataacaaataccgcacgaccctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatcagcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcataccccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttttggccgacagcatcccacacagaaagaggaataa +>locus_17:16:0:9 +atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa +>locus_18:17:0:10 +atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag +>locus_19:18:0:11 +atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacattgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccaccgtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggccgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattactggaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcattcagggcggtacggaagtttgtcagcaaaaactggataccatgcgcaacgcaggcgtaaaagtgaacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggcgtcagcgcagagatcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacaggagaccggcaagctcggcgagatcctgttctttatgcgtgcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccggcaataactggcagttcgacggcgacgcggaaactattgcccactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgaaattaccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa +>locus_2:1:0:12 +atgaaactttttctgacaacggcagcgctaactgcgaccttgatttccggcatggcatttgccagcgatcccgtcatcccctgggcgactaacagcggcggtacggaaagcacgcatattgcggcaatgggcgaggatttgaatgcgcagcatcagcagatcacccacacgcatgaaggcgtatgggcggccaattccggcagcattcaggctgatgaagccgcgctgaccagcaacaagccgccggtacaaggccacccggaattgatgccgcatcagggctaa +>locus_20:19:0:13 +atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa +>locus_3:2:0:14 +atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa +>locus_4:3:0:15 +ctgaaactaatacctttttatttgctcgcgctattttctgccgcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaaccactgcagataacccgccttataccatgacaatattgccgccagtagaaggctgcggggaaaaatgtgtgaaactgaacgtacagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaaggacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctataaccgacagctgcaattactttgggctggcgtaataccgaaaaataaaaaggggaaatggataacgaaaatggaaaattactggttcaggcaggtaaaagcgcactaa +>locus_5:4:0:16 +atgaaaaaattttatagctgtcttcctgtctttttactgatcggctgtgcccaggtgcccctcccttcctccgtgagcaaaccggtacagcaacctggcgctcagaaagagcaactggccaacgcaaatagtattgatgagtgtcagtctcttccgtatgtgccgtcagaccttgcgaagaataaatcattatcaaaccagaacgctgataattccgcatcaaaaaatagcgcaatcagctcaagcattttttgcgaaaaatataaacaaaccaaagaacaggcgcttaccttcttccaggaacatccacaatacatgcgttcgaaagaagatgaagagcaactcatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa +>locus_6:5:0:17 +atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa +>locus_7:6:0:18 +gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggcctcacaacatgcctggagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgtcagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcaccatccagtcggcaacctggttttccgcaggcaggtcgcttacggcggataaaaaaatgatggattgcggcagcggtatttatgcttcaatcaatacgttactgaaaaagtcgcagaataaaaatatagtcatttttacccataatcattgtttgacgtatattgcaaaaaataagcgcggcgtgaagtttgatccggactatcttaatgcgctcgttatgtatgcggaaaacggaaaactctttttagacggtgaatttgttcccggctag +>locus_8:7:0:19 +atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga +>locus_9:8:0:20 +atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/extract/G11/raw.extracted.seqs.fasta b/tests/test_data/outputs/extract/G11/raw.extracted.seqs.fasta new file mode 100755 index 0000000..2197d22 --- /dev/null +++ b/tests/test_data/outputs/extract/G11/raw.extracted.seqs.fasta @@ -0,0 +1,42 @@ +>locus_1:0:0:0 +atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa +>locus_10:9:0:1 +atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa +>locus_11:10:0:2 +gtgaacctggtgaaaaccacgggtctgcacgcggcgggcaaaggcattaacgttgccaaagtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccgacttcaacttttccggctttgacgtgactcccgcagactgggaacgctttgttaacgactccctgagctggctgggtcagttcgatatggtctgcgtcagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatctttgatagtagccgtgaagcgttagtcgccggtcttaaagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcacgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgtgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggtaagtaccgtcggcgcaggcgattccatggttggcggattgatttacggcctgctgatgcgcgagtccagcgaacatacgctgcgtctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga +>locus_12:11:0:3 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>locus_12:11:0:4 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>locus_13:12:0:5 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>locus_14:13:0:6 +ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctcttccgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccaacgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcagaaaggcgggattggtttgcgcggtccgggtgaaacccagctcgaaaccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgaaaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgctgctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaaatccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtctgcaggtacgaatgccgattgtcgactggcgtcgcctctgtaaacaagaaccagcgttgatcgaatacgtgatctag +>locus_15:14:0:7 +gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga +>locus_16:15:0:8 +atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctggtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatcagcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacgctgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcattcaaccggaatggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaacgctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgaggcgctggcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtgagctgaccagccttgtgcgcaaccttaatcaactgctcaaaagcgagcgcgaacgttataacaaataccgcacgaccctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatcagcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcataccccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttttggccgacagcatcccacacagaaagaggaataa +>locus_17:16:0:9 +atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa +>locus_18:17:0:10 +atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag +>locus_19:18:0:11 +atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacattgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccaccgtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggccgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattactggaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcattcagggcggtacggaagtttgtcagcaaaaactggataccatgcgcaacgcaggcgtaaaagtgaacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggcgtcagcgcagagatcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacaggagaccggcaagctcggcgagatcctgttctttatgcgtgcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccggcaataactggcagttcgacggcgacgcggaaactattgcccactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgaaattaccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa +>locus_2:1:0:12 +atgaaactttttctgacaacggcagcgctaactgcgaccttgatttccggcatggcatttgccagcgatcccgtcatcccctgggcgactaacagcggcggtacggaaagcacgcatattgcggcaatgggcgaggatttgaatgcgcagcatcagcagatcacccacacgcatgaaggcgtatgggcggccaattccggcagcattcaggctgatgaagccgcgctgaccagcaacaagccgccggtacaaggccacccggaattgatgccgcatcagggctaa +>locus_20:19:0:13 +atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa +>locus_3:2:0:14 +atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa +>locus_4:3:0:15 +ctgaaactaatacctttttatttgctcgcgctattttctgccgcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaaccactgcagataacccgccttataccatgacaatattgccgccagtagaaggctgcggggaaaaatgtgtgaaactgaacgtacagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaaggacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctataaccgacagctgcaattactttgggctggcgtaataccgaaaaataaaaaggggaaatggataacgaaaatggaaaattactggttcaggcaggtaaaagcgcactaa +>locus_5:4:0:16 +atgaaaaaattttatagctgtcttcctgtctttttactgatcggctgtgcccaggtgcccctcccttcctccgtgagcaaaccggtacagcaacctggcgctcagaaagagcaactggccaacgcaaatagtattgatgagtgtcagtctcttccgtatgtgccgtcagaccttgcgaagaataaatcattatcaaaccagaacgctgataattccgcatcaaaaaatagcgcaatcagctcaagcattttttgcgaaaaatataaacaaaccaaagaacaggcgcttaccttcttccaggaacatccacaatacatgcgttcgaaagaagatgaagagcaactcatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa +>locus_6:5:0:17 +atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa +>locus_7:6:0:18 +gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggcctcacaacatgcctggagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgtcagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcaccatccagtcggcaacctggttttccgcaggcaggtcgcttacggcggataaaaaaatgatggattgcggcagcggtatttatgcttcaatcaatacgttactgaaaaagtcgcagaataaaaatatagtcatttttacccataatcattgtttgacgtatattgcaaaaaataagcgcggcgtgaagtttgatccggactatcttaatgcgctcgttatgtatgcggaaaacggaaaactctttttagacggtgaatttgttcccggctag +>locus_8:7:0:19 +atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga +>locus_9:8:0:20 +atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/extract/G11/seq_data.txt b/tests/test_data/outputs/extract/G11/seq_data.txt new file mode 100755 index 0000000..5b3ccc3 --- /dev/null +++ b/tests/test_data/outputs/extract/G11/seq_data.txt @@ -0,0 +1,22 @@ +id seqid locus_name query_id qlen start end sub_start sub_ent ident qcovs bitscore reverse complement is_complete is_trunc fivep_trunc threep_trunc is_extended is_5p_extended is_3p_extended seq start_codon stop_codon is_stop_valid is_start_valid is_cds_valid +0 0 locus_1 0 102 23742 23844 23843 23742 100.0 100 189 True False True False False False False False False atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa atg taa True True True +1 0 locus_10 9 762 22969 23731 23730 22969 100.0 100 1408 True False True False False False False False False atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa atg taa True True True +2 0 locus_11 10 858 22100 22958 22957 22100 97.786 100 1480 True False True False False False False False False gtgaacctggtgaaaaccacgggtctgcacgcggcgggcaaaggcattaacgttgccaaagtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccgacttcaacttttccggctttgacgtgactcccgcagactgggaacgctttgttaacgactccctgagctggctgggtcagttcgatatggtctgcgtcagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatctttgatagtagccgtgaagcgttagtcgccggtcttaaagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcacgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgtgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggtaagtaccgtcggcgcaggcgattccatggttggcggattgatttacggcctgctgatgcgcgagtccagcgaacatacgctgcgtctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga gtg tga True True True +3 0 locus_12 11 972 11 983 982 11 100.0 100 1796 True False True False False False False False False atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga atg tga True True True +4 0 locus_12 11 972 21117 22089 22088 21117 100.0 100 1796 True False True False False False False False False atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga atg tga True True True +5 0 locus_13 12 1098 20008 21106 21105 20008 100.0 100 2028 True False True False False False False False False atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga atg tga True True True +6 0 locus_14 13 1281 18716 19997 19996 18716 99.141 100 2305 True False True False False False False False False ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctcttccgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccaacgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcagaaaggcgggattggtttgcgcggtccgggtgaaacccagctcgaaaccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgaaaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgctgctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaaatccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtctgcaggtacgaatgccgattgtcgactggcgtcgcctctgtaaacaagaaccagcgttgatcgaatacgtgatctag ttg tag True True True +7 0 locus_15 14 1434 17271 18705 18704 17271 100.0 100 2649 True False True False False False False False False gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga gtg tga True True True +8 0 locus_16 15 1464 15796 17260 17259 15796 98.975 100 2621 True False True False False False False False False atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctggtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatcagcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacgctgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcattcaaccggaatggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaacgctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgaggcgctggcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtgagctgaccagccttgtgcgcaaccttaatcaactgctcaaaagcgagcgcgaacgttataacaaataccgcacgaccctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatcagcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcataccccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttttggccgacagcatcccacacagaaagaggaataa atg taa True True True +9 0 locus_17 16 1836 13949 15785 15784 13949 100.0 100 3391 True False True False False False False False False atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa atg taa True True True +10 0 locus_18 17 1914 12024 13938 13937 12024 100.0 100 3535 True False True False False False False False False atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag atg tag True True True +11 0 locus_19 18 2037 9976 12013 12012 9976 99.215 100 3674 True False True False False False False False False atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacattgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccaccgtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggccgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattactggaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcattcagggcggtacggaagtttgtcagcaaaaactggataccatgcgcaacgcaggcgtaaaagtgaacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggcgtcagcgcagagatcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacaggagaccggcaagctcggcgagatcctgttctttatgcgtgcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccggcaataactggcagttcgacggcgacgcggaaactattgcccactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgaaattaccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa atg taa True True True +12 0 locus_2 1 285 9680 9965 9964 9680 94.035 100 433 True False True False False False False False False atgaaactttttctgacaacggcagcgctaactgcgaccttgatttccggcatggcatttgccagcgatcccgtcatcccctgggcgactaacagcggcggtacggaaagcacgcatattgcggcaatgggcgaggatttgaatgcgcagcatcagcagatcacccacacgcatgaaggcgtatgggcggccaattccggcagcattcaggctgatgaagccgcgctgaccagcaacaagccgccggtacaaggccacccggaattgatgccgcatcagggctaa atg taa True True True +13 0 locus_20 19 4935 4734 9669 9668 4734 100.0 100 9114 True False True False False False False False False atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa atg taa True True True +14 0 locus_3 2 327 4396 4723 4722 4396 100.0 100 604 True False True False False False False False False atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa atg taa True True True +15 0 locus_4 3 417 3968 4385 4384 3968 97.362 100 710 True False True False False False False False False ctgaaactaatacctttttatttgctcgcgctattttctgccgcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaaccactgcagataacccgccttataccatgacaatattgccgccagtagaaggctgcggggaaaaatgtgtgaaactgaacgtacagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaaggacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctataaccgacagctgcaattactttgggctggcgtaataccgaaaaataaaaaggggaaatggataacgaaaatggaaaattactggttcaggcaggtaaaagcgcactaa ctg taa True True True +16 0 locus_5 4 444 3513 3957 3956 3513 96.622 100 737 True False True False False False False False False atgaaaaaattttatagctgtcttcctgtctttttactgatcggctgtgcccaggtgcccctcccttcctccgtgagcaaaccggtacagcaacctggcgctcagaaagagcaactggccaacgcaaatagtattgatgagtgtcagtctcttccgtatgtgccgtcagaccttgcgaagaataaatcattatcaaaccagaacgctgataattccgcatcaaaaaatagcgcaatcagctcaagcattttttgcgaaaaatataaacaaaccaaagaacaggcgcttaccttcttccaggaacatccacaatacatgcgttcgaaagaagatgaagagcaactcatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa atg taa True True True +17 0 locus_6 5 543 2959 3502 3501 2959 100.0 100 1003 True False True False False False False False False atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa atg taa True True True +18 0 locus_7 6 606 2342 2948 2947 2342 97.525 100 1037 True False True False False False False False False gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggcctcacaacatgcctggagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgtcagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcaccatccagtcggcaacctggttttccgcaggcaggtcgcttacggcggataaaaaaatgatggattgcggcagcggtatttatgcttcaatcaatacgttactgaaaaagtcgcagaataaaaatatagtcatttttacccataatcattgtttgacgtatattgcaaaaaataagcgcggcgtgaagtttgatccggactatcttaatgcgctcgttatgtatgcggaaaacggaaaactctttttagacggtgaatttgttcccggctag gtg tag True True True +19 0 locus_8 7 642 1689 2331 2330 1689 100.0 100 1186 True False True False False False False False False atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga atg tga True True True +20 0 locus_9 8 684 994 1678 1677 994 100.0 100 1264 True False True False False False False False False atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag atg tag True True True diff --git a/tests/test_data/outputs/extract/G12/blast/hsps.txt b/tests/test_data/outputs/extract/G12/blast/hsps.txt new file mode 100755 index 0000000..cc33ab4 --- /dev/null +++ b/tests/test_data/outputs/extract/G12/blast/hsps.txt @@ -0,0 +1,21 @@ +0 0 102 23855 1 102 23844 23743 102 0 100.000 100 100 minus 1.92e-51 189 +1 0 285 23855 1 285 9965 9681 285 0 100.000 100 100 minus 1.10e-152 527 +2 0 327 23855 1 327 4723 4397 327 0 100.000 100 100 minus 5.70e-176 604 +3 0 417 23855 1 417 4385 3969 417 0 100.000 100 100 minus 0.0 771 +4 0 444 23855 1 444 3957 3514 444 0 100.000 100 100 minus 0.0 821 +5 0 543 23855 1 543 3502 2960 543 0 100.000 100 100 minus 0.0 1003 +6 0 606 23855 1 606 2948 2343 606 0 100.000 100 100 minus 0.0 1120 +7 0 642 23855 1 642 2331 1690 642 0 100.000 100 100 minus 0.0 1186 +8 0 684 23855 1 684 1678 995 684 0 100.000 100 100 minus 0.0 1264 +9 0 762 23855 1 762 23731 22970 762 0 100.000 100 100 minus 0.0 1408 +10 0 858 23855 1 858 22958 22101 858 0 100.000 100 100 minus 0.0 1585 +11 0 972 23855 1 972 983 12 972 0 100.000 100 100 minus 0.0 1796 +11 0 972 23855 1 972 22089 21118 972 0 100.000 100 100 minus 0.0 1796 +12 0 1098 23855 1 1098 21106 20009 1098 0 100.000 100 100 minus 0.0 2028 +13 0 1281 23855 1 1281 19997 18717 1281 0 100.000 100 100 minus 0.0 2366 +14 0 1434 23855 1 1434 18705 17272 1434 0 100.000 100 100 minus 0.0 2649 +15 0 1464 23855 1 1464 17260 15797 1464 0 100.000 100 100 minus 0.0 2704 +16 0 1836 23855 1 1836 15785 13950 1836 0 100.000 100 100 minus 0.0 3391 +17 0 1914 23855 1 1914 13938 12025 1914 0 100.000 100 100 minus 0.0 3535 +18 0 2037 23855 1 2037 12013 9977 2037 0 100.000 100 100 minus 0.0 3762 +19 0 4935 23855 1 4935 9669 4735 4935 0 100.000 100 100 minus 0.0 9114 diff --git a/tests/test_data/outputs/extract/G12/blast_db/contigs.fasta b/tests/test_data/outputs/extract/G12/blast_db/contigs.fasta new file mode 100755 index 0000000..826005f --- /dev/null +++ b/tests/test_data/outputs/extract/G12/blast_db/contigs.fasta @@ -0,0 +1,2 @@ +>0 +nnnnnnnnnnntcattctgacacctccattttttgcgccattttggatgctctgtattcagggatggtggtcacaatcgcaccgactaacgcgaacagcgtaccgatgatggtgaccaggtagaccgtattgcctaatgaagggatcaattcatcaattagcactgagccaagcagttggcctgctgttgacgctacgcccagcatcaatagccctaagcctctcaccagaatcgccattagcccgatggatagcagacccagcggaccaccgagatacatccaccatgtatcgggtaactggatggtgacatggcctaatgcgatacgtatcgccagcgccgcgcccaggacacaaaagccgacgatgaagttccatgtaatggacaccagcatggagcccgttgcctcggcgactttcgcattccccgcaggctgccagccagcgagtaaccctgccaaaaaggggaggatagcgagcaggataaacgaggttgagtgccactgtggcgacacgacaaaaatggtggcgataacggcgaacaatgcgccagtaatgcgccatggcgtaaaatattttttctcctccacgccgatgccaaaacggtcgcacagcaggccggaaagaagcagagcggaaattaatgccgtttgaaaggtggcaacgcccagcgcgctggcggatgcgccttcagaaaatacgaccatcgccccgcataatcctgcaaaccaattccatagcgggatttttctctttttaatcagagtagggattgaggcgaattgctggcgtgtttctttgcgcgcaataataataaaaaacatgacgaccagaccgctggcaaacgagattactgcgcaagcattaccgtcttgtaaccaatgtcctaactgcccattaacggcagactgcatcggggaaagcataccggctaagatggtggcaagcatcagtaagggggttgagtacttattcttgttcatnnnnnnnnnnnctagaatgaaccggaatgcggctctaaactcatcccataatcgctgattaatcgccacgttaccgtaccatgaggggcggcagtcgaactcaatttaaatgacgctgatgattttggcggaacgaaggtcgctgatttcacttcatggctgttaagcgtcacactggcaaagttcatataatagggggtagggttatttacggtaataacatcccctgccgtctgccattttaattgttggctctggctatcaggcgttgatttggttaatgctggcggtcgataaataagctttatttgggtattaatggagatttccacgcggttcgcggaggcattatcatcaatagaaggaatacccttaatattgagccagtacatagactcccgatctgcaggtagaggggcgccagagcgaataacccgaatactgttcttttgcccggcatcaaggcgaaaaagaggcggggtgataataaacacctgcttatttgtgacctggggatcggcaaccgataaccatgactgaacaagattcgctttgctgtctttattctctacattgattgacgactcatcattattgccgtcgaaaaccagtcgggttccgccaacaacaatgctggcatgggcaacatggcttgtcagtaatacgaccaacacggatttctttatgtgtttcatnnnnnnnnnnntcatgccgcttgctcctgcacccacttcgtcaccgtggcgcgaatgtcgcgcatgacggcatccagcggctgggtcgcgtcaatggtgcggatacgcgagtcttgcgccgccagttccagatagcgcgcgcgagtccggttaaagaaatcaaaagattcctgctcaatgcgatccaaatcgccgcgcgctctggcgcgttttagcccgacttccggcgtgacatccagatacagcgtcaggtcaggacgaaaatcgcccagcacggcatcgcgtagtgtcgccagcatggtttgatcaatgccgcgccctcccccctgatacgcctgggtcgagagatcgtggcgatcgccgatcacccatacgccttgcgccagcgcgggtttgattaccgtttcgacgagctgtacgcgagcggcataaaacatcagcacttccgctttatcggtaatcacttcgtcgcctaccgatcggatatccagcaccagacttcttagtttttcggcaagctgcgtaccgcccggctcacgggtaaaaatcatgttacgaatacccagttgctcaagggtctccaccaccacgtcgcgcgcagtggtttttccggcgccttccaggccctcgatgacgatataattactgcccatnnnnnnnnnnnctagccgggaacaaattcaccgtctaaaaggagttttccgttttccgcatacataacgagcgcattaagatactccggatcaaacttcacgccgcgcttattttttacaatatacgtcaaacaatgattatgggttaaaatgactatatttttaatctgcgaatttttcagtaacgtattgattgaagaataaataccgctgccgcaatccatcatttttttaaccgccctacgcgacctgcctgccgaaaaccaggttgccgactggatggcgcgcaccgtattactggaatagagattgtaattttgtatatcggcactgaaggctttacccagcgctctggcatcttgcgcaccgttgaccgtaatccccgtgctgtctggcaggcaggtattatcggaacgatcgcaccgctcggcatggcggaataacacgactaccggatgctgcttcgccagcgccgccagcgccttaccgttaatctgcggtagaccgttaccgctacaggcatgttgtgagtccaacccagcgataataaccaacgcccccgccagaatcgcgaaatagcgtttgtttttaataaagcgtagggtaaatgccagcacnnnnnnnnnnnttaaaccgcttgttcaaggctgaaactgtgacagtgaacctgcggtttcagcctgtcgttcataattttaatcgcatcgccaagctgcatggttcggcctgctatcaccacgcctggctgtgccagcagacacaacgcggcattttctcccggctcaacgaccagcaaattaacctgttccgccgtatcgcttaaccagacgcttgcggcatcacccgtgcccggggtccacatttcgccatgcgccacaaaatgccagcttttcggcatctgcggtttgagatagcgaatcgccaccagcgcattcaataccagctccgcgcgttgctctttggttaattcgaaatcccggcatttttcttcaaaggaaaaatagagcgcggcatcatccacacaaaaaccggtcgggcaaaacgcgtccggcgtaagcattttacgagagaagcgcgagcgaaaaagcataccattggcgagatcgagcatcatacgatcgtgctcttcatcataataccagcgccagttatcgtcaggtttaattcgcatnnnnnnnnnnnttataaggcttgcagtctttcatgggcagcaagtaacgtctgatatatgcttaaattcttacttccgggttcaagtagaacttttttaaattcggtcatgtgttgctcttcaacttctttcgaacgcatgtattgtggaagttcctggaagaaggtaagcgcctgttctttggtttgcttatatttttcgcaaaaaatgcgtgagctgattgcgctattttttgatgcggtattatcagcgatctggtttgataatgatttattcttcgcaaggtctgaaggcacatacggaagtgactgacactcatcaataccatttgcgttggccagttgctctttctgagcgctaggttgctgtaccggtttgctcacggaggaacggagaggcacctgggcacagccgatcagtaaaaagacaggatgacagctatcaaattttttcatnnnnnnnnnnnttagtgcgcttttacccgcctgaaccagtaattttccatcttcgttatccatttccactttttatttttcggtattacgccagccctaagtaattgcagctgtcggttgtaggttccaatagcgtgtccaccatacgccccttcaaatgctatcaccatgggtccctgcccttttaccccctcttgtcgttcatggcaataaagcgcgttgcggtgagtcacgtcaagttcatactctacgttcagtttcacacatttttccccgcaggcttctactggcggcaatattgtcacggtataaggcgggttatctgcaggggttttccatgttccgatcaggtctttgcaggcgtttatctccgtagccccggaggaggcagaaaatagcgcgagcaaataaaaaggtattagtttcagnnnnnnnnnnnttagtcccaccaaacgtcgaaaagttcgctggttcggacttcttcaaggttgcgcgcttccagccacttacgcacaatcgcctgatgttcttcggtacatttaccgatttcctgcatacaaatcagcccttcccaggccaggtagccgctgccgtcaaacgccagtttattaggctcaataacgtcattaataaagtcatcgacagtcttatcgatctgctcttcagatgtaccttccggaaaacgccatgccaccgaaaatcctaattcctggaattcgtcaatgtgcatttttttacgcagacgacgactacggttctttgccatnnnnnnnnnnnttaaggcgtcacaatcagcagtccctcgctcgcgccggttgcccgccactgaggcgcgtacatcgattccacctgcggttgcgcaagctggtacgtccccggcgttaccgcgcgcgccaggtagaccagcgtcacgggctggccctcattgacaacgacggcagccacaaaccgatcgtcgcggaattccatatactgaatatccgcctgctgcatctgattaagcagattttgcacttcgctaccgctctccggcaggctggcgctgctgtcagccagattctggttttccagctccagcccggccgggagcaggtccaccaccagcgcatccggcacattgcgatcggccactaccgttaaccagaccagcaccagttcgccgctacgcaacgaggacagcgatttgcgctgaccatcggtccccagtatttgtcgttcaatctgcaaaacgttgctggcaggctcaggcgcagatgagggatagccgctgctatccagacgcagccatagcggctggctaccggtgttcgtcacctcaagggcggccagctgatcagcatccagattacgggtcagcgccttgtcgcccgacagcggctgcgcctctaacgaggtctgcgcctgccaggcgcccgcgctggcctgtcgcgaatgcgcggcgaggaacaaggcattgttctcctgggtagagagccagcgctgaccgaaggcctgctcagaaagcgagcttaatagcgcgttttgcgcgtccggtctgaggttgttctcttccagtaacgacaacatcagagcgttatcgcgcagagaactgccgtaatccgctatccattgccgttcgtcctgacgcggcgtattcagagccagcgtaatggcctcttcgccgcgtctggcatcacccatcgtgtttaacgcgatgcccaattgcatcagcggcagtcctgaagccgcctgactacggcgctcccagatttcgcgcagcgcgccgagcggcgctttctgctgacgcgccagtaccagcgcggcgtaagcctgagcggcaaaagtactggcctgggtattatcgctataacgaatcagcatcgtaccgggatcctgcagatagcgcagcagtcgctcattgccccggttaatggcctccggcgggacgctatatccctgctcgcccgcgcgaatgaggaaatccatcgcgtaggccgttagccagggctcttccgccccattttcatcccatagcgcaaaaccgccgttatcacgctgcatctgtagtatgcgggagatgccgatatccaccgcggcgcgccgtttttcatcgctatcgccggtaataccgagcgactgcaattgagcggcattggtatacagcgccgggaataacccgctggtggtttgttccaggcacccgtacggatatgctttcagctcgcgaatgtagcgcgccagattgagcggcggttttccgcttaacagcagttgtccctgtaacgtggctggcgagacgtttgccagatgctgctctggtacatgccagctctctcccggcgccagcgcaatgccgctatttaccgtttgggcaggccaggccggacgcacgccgatttgccactgcttatgctgcgcgccgagggtttctcccggcagattcagaccgctaatggtcgcctggatttcgccttcgccaaaaccttccagcgcgcgtaccggaacgaataaggtggtgcgcacgcccggcgccaggttgaccggttgcggctgttgactaagcagttccagtaacccactggcggcgagcgcaatattcagcgtctgcgggcggtcggtcagattggtgacgtccagcaccagtcgcgaaacatcccctcccgccagaaaacgcggcatattcagctcggcaatcactggcgcggcgacaacgactttgctttcgccgcgaccaaaatcgtccgctgtccatgcctgcgccataacccgcagttcgccgttaaagtcgccaatcggcagcgttacgaccccttcgccctgctcattgagcgtgatcggctgcgcctgctgcgcgatgatattggcatggtttaccggcggttttccgccgcgcgtaaggtcgtcgccatcgccgccaaaacgcaacgccgccagccgcccctgcccttcaatgacctggccgtaaatatcgtagatatccgcaccgtagcgtttttgaccgaagaacgcctgccacgggtccggcgtcgcgtaatcggtgatattcaatacaccgctatcgaccgcggagaccagcacgttgatctgttttggcatttcgccgtgtttaacgctggctttcaccctgacggtgagcggctgattcgggcgcattttagccgggctttccagcgccagatcgaggcggcggttgtcatcccccagcggtagatgtagtaaccccacggcgcgttttggcgtcgcggaacgagatttatcgccgggacgcaccaccagcgtactgagatagagatcgtggcgattccaggttttatccaccggaatcgtgagctccagcccctgcgccggcacgtcgatcgcctgccaccacagcggaccatcgctggactccaccatggcataacctttaccggcgaccggcgcggcgatatgcaatttcatggtgtcgcctggacgataattcgctttatccagtttgagggtgacgcgatccggacgcgccgcgccgctaccgtcgctgttatcctgccagctatagccggcccagaaacgaacgctgctgaccgtctcattcggcgctttgacctccagacggtacgcgccccattccaccgggaagctgacttttccggtttcatccgcgttcagatccagcgtctgctcgccctccaccagatctttttgatcaaactgcgactgccagccttcgctttccgaccagttccagtaatagtcgcgacgctcgcggatgagccgcacctgtaaaccggacaccgcttttttctcgccctgcgcgttggcgtaaacaatatcgaatgcggcgttgctgtcttcgtcgacaatcggttgattaacggtggtatccgtacggtagtcgtataccgctttggcggcgaactgtggacgaattcccggtaacgtatcggcaggccaaatcgcctgctctacgcgacgagtgaccggacgaccgccagactccagcaggctggcctgtagaattacctgcaacggcgaatgcgcttcttgccactggctggcagcactcacttcaccacgtccgcctttatccaacgtcagttgaacttcgtccaggctgcgcgaaagattctcttcggcaatattgccgaactggaagccaggcaacgccgcgacagcgtcgcgcagcggacgcaggaaaagttgcccttgcagggtattgccgttagcaggggcgccatacaagtaatagccgacaacggagaatttcacctcatccgcaggcgccagcggtgttttttgcgccgtcaggttgagcgccatccgctccggcataaagtcttccacgtggaaatcccaactccgcagcaaattatcgccggtgttggcgcggacatgccacaagccggtcggcgcgttgatatccagcggataattcaaacggtatagtccgttttccggctggctgacgacggtacgcatcacttgtccgtctggttttaccacttccagcttaacgggttgatcgggcagcgttttaccgtcgctatcgcgcagtaatccgttgaggataaccgtttcgcccggtcggtagagatcgcgtgggccgaacataaagaactgcttgctgtagccgggcgcgccagcgacattaaactccgacagatccagagccggaagcgtgagatcgagcagcgtggtctgcccctctttacgcgccagtaatagcgccgccgctttatcagcctccagttgcacatgtccctgcgcgtcgctcgtcgcctgcgccagcgtctgccctttatcattcagaagaacgatctcaattcccgactgcgccgcgccgttttccaggctttgcgtaaagatatccagccgactatggtaacggtgcgcggacacgccgatatcgctaagggtaaacaacgtagcggcattactatagttgtagtgtccagcctgattcattaccgccacatatacgcccgcctgttgcagcggcttaatatcgcttaatggcagcagcagtttctcacgcgtattacgcgccggattaagatcaaaacgaccggtataaaccagatccgccattttcagcagattgtcggattcccagttagagagggaactacggtactcccactggctgacaaacgacgccagcgatccgggcttaacgcggaaaaagttcacatcaacgtggttgacgttaagcgccatgaccggcagtccttccgctattttccccggtagcagcgatccccggctggcaaagccgacgctgggctggacatcacgcgtggtaatcgttttttcataagacttgccgaaggtggcgttattcagcgctttaacggcgggatcaaccgtgaccaccagcacgcgctcaggttccagatgacgtaaccttagctcttttaaatttggcgccagctcccatgcgccgtcaacgctgccgcttttcttatcaaccacgtgaaccacacgggagaaatcctgttcaggatctaaaggaattgaaaacgtcagcaccagcgtcgccgcgccgtcgagctgcgcttcggaggcgtctaatagcgtgagcgctttgccctggctttgctgcgccagcttttgtagctgtgacgggtcttgcgcgggcgagggctgcgctacggctggcgcttcgcttttagtcgtcggggcggttttatcgttgttatcgcatcccgccagcgccagcatgatcatgcaggccaccacgcgtaaatgtttcatnnnnnnnnnnnttagccctgatgcggcaacaattccggttggacttgtaccggtggcttgttgctggtcagcgcggcttcatcagcctgaatggtgccggaatttgcggcacatacgccttcatgcgtgtggatgatgtgctgatgctgcgcattcacatcctcgcccattgccgcgatatgcgtgctttccgtaccgccactgttagtcgcccaggggatcacggtatcgctggcaaatcccatgccggaagtcaaggtcgcagttagcgctgccgttgtcagaaaaagtttcatnnnnnnnnnnnttagatattccgtaaagaagcaaaaagtaaagcccactcgctcttcgcgcgatagaagaccggcggcttgccaatcggcgcatccacggaaatctcaccgccgtggtgagcttcgccggtccagatattcacccagtgatcttccggcaggtacagcgtccaatcgcaacgcccctgctcgtgaaccggcgcgaccagcagatcctgaccgagcagatattgatatttcagggtgtaggtagcggcatcgttctcgtagtgcaggaatagcggacgcatgaccggcagaccggtagccgcgttttgcgccaccgcctgcttgagatacggtttcagcgtggtaaagacggtggtcatgcgggcaaagtgtgcaatagtttccgcgtcgccgtcgaactgccagtcattgctggggcggttgccttcatgggtgcgcatcatcggcgtaaaggcgctgaaatcgcaccagcgcagcagcaactctttgctgcgcttcatgtcaaacagggtggtgtagccgccgatatcgctgtgatgcagaccatggccggtcatcgccagcgacaatgcagcaggcacgacagaggccagaccatcatcaagactccagtcaacgttctggtcgcctgcccacatcatggtggaatatttctgactgccggtgtaacccgcccgcataaagaacaggatctcgccgagcttgccggtcttctgtagcgcttcgtagttacacttcgcccacagtgcgggccaggcgttatgcatgagctctgcgctgaccccgttgtgcagatacgtgtcggtcggcagatattcgccgaaatccgccatccagccgctgcagccgagcgcgatcatgttctttttgatgacatctttgaaccagtcgtaagcttcaggattagtcagatcgaccacgccgccatagaattcgccaaactcgaccagatagtcgccgcccgtggcgtctttcgccagatagccgtgtctcgccgcctcagcgcagaggtctttatcactggcgacgtatgggttgatataagagaggaactggacgccttcttctttccactgtttgatccggctatccagctgtggatagttgtcgctattccacttccagttccacatcacgcgcttgccaaaggaggtcatgcggataccggaccagtcctgcgcccaaataccgtacacttttacgcctgcgttgcgcatgttatccagtttttgctgacaaacttccgtaccgccctgtatgccgagcgtgacgccgtcgtaaacccagtccggcagctccggctggcgacctaacagcgcagtcagtttttccagcagggcgatgtaggtgtcggcacactcaaaacgcagcgtagttttatcttcccacagcgccagttcgtgatactccggcgcgctgaagtcgaaattcatatagcagctattatcgacgtggcagtaatacttctgcgtgctgacaaaggtcggttgcgggaagaaggtcaagtaatagtcgccgccggcgttctctttacagtctgcctgccaggtgacatagctggttttattacgaccaacgccctgttcgctggtccacagcgggaacggcttgccgcgcaaatcgaaataagagaactgttcgccgcagccgtagatatggtcgtctggattagctgcgaggcgtaaccagatacggttatggtgcaggtcgtcgttttgcagatccaacgtcaggcgtcccgcctcatcggcggagatgcgaagggtggcgctaattgttgcgccacggctgaattgtaccagccagccgtcgggtagctcgctgactgtggcctccgttaatgcaatcttctcgttaagtttgtctttgatgctgaagttgccgcgaaacatgtcgatgtcggcaacgcccgcgccaatccacagacaggggttttcggcgctgtggcgtaaaatcaggcgctgttgccagctaagcgcaaaaccatcctgtgatgttgtcagttcaaaatcggttgaccgttgtggtagagaattcatnnnnnnnnnnnctactcatcttcaagataagtataaccgtacagtcccgcttcaaattcctcaaggaactgctgctgcaacgcatcgtccagatccgtctgttttacctggtcgcggaaatgcgttaatagcgttttcggatccagttgcacatattgcagcatatccgcaacggtatcgccttcgtccgacaactcaacctcgacactaccatccgggaagacaaacacgtcaaccgcttcagtatcgccaaacaggttgtgcatgttaccgaggatctcctgataggcgccgaccataaagaagccgagcatcggcggattctctggatcgtattccggcatcggcatcgtcgtggcgataccgtcgccatcgatatagtggtcgatagcgccatcggaatcacaggtaatatccagcagcacggcacgacgttccggtacctgatctaacccttccagcggcagcaccggaaagagctgatcgattccccacgcgtccggcatcgactggaacagcgagaagttgacgtacattttgtccgccatccgctcttgcagttcgtcgataatcgggcgatgcgcacggttttgcgggtccagttgcttctgcacttcatggcacatgctgagataaagttgctccgcccaggcgcgctcctgcaaactaaacgcgccggaagagtagccgatatgaatatcgtgcagatccatttggctatcatgcagccattcacgcagcgagcggcgggtgccaggcttatgcatctcctgccaggtttcccacagattttgcagcgcgcgcggcgcatcttcagcaggggcggtcggatccgtgtattcgttacgctccacgccgataatgttagagaccagtaccgtatggtgcgcagtgacggcgcgcccagactcggtaatcaccgtcggatgcggtaaaccatgctcttcgcaggcatcgccaatcgcccagatgatgttattggcatattcgttcaggccatagttcaccgaacagtcggactgcgagcgggtaccttcataatccacgcccagaccgccgcccacgtcgaagcactggatattaacgcccagcttatgcagctcaacatagaaacgcgcggactcgcgcacgccggtcgcgatatcgcgaatgttcgccatctgcgatcccaggtggaagtgcaacagttgcagactgtccagacgcccagcgtcacgcagggtctccaccagttgcagcacctgcgtcgccgccaggccgaattttgatttttcgccgccggaggattgccacttaccggacccctgagaggccagacgcgcacgcacgcccaggcgaggaaccacgttcaggcgctcggcctcttccagcacaatcgcgatttcagacatcttttcgatgaccagataaaccttatggcccatcttctcgccaatcagcgccagccgaatatattcacggtctttataaccattacagacgatcacgctacgggtcatgccggcatgcgccagcaccgccatcaattccgctttcgaccccgcttccagccccaacggttcaccggaatggataagggactcgatcacgcggcgatgctgattgaccttaatcggataaacgaggaagtagtcgccgttataaccgtaagattcacgcgcacgcttaaacgccgcgttaattgaacgcaaacggtgttgcaggatctgcgggaagcagaacagcgccggcagacgctgaccttgcgcttcgcgcgctttcaccagtttggcaagatcgacacgcgcttccggtacgtcgggatcggggcatacgctaatatggcccagctcgttgacgtcgtagtagttattgccccaccaggcaatattgtaagtgcgcagcatcttgctggcttcctgggagctcattgcaacctcctgcatnnnnnnnnnnnttatccgatacgactgacttcatcaaataaggtggctaacccgctgcgccgttccgttcgcgtcacaatcgcgcctgccaggatccgttcatcggcatacagcgataaccgccgccgcgcccgcgtaacagcggtatacaccagctcccgcgtcacgaccggcgaacgttggctgggtaaaatcagcgcggcgtgatcaaattcagacccctgtgatttatgtaccgtcatcgcccaggttgtatcatgttccggcagacggctgggctgaacggacttgatcgtgccgtccggcatcacaaaccagacgcgtaacccctgcccgcgatcgagcgcaataccaatatcgccgttaaatagccccaacgcgctatcgttgcgcgcaatcattaccggacgcccttcataccagcgagagtgcggatgccgctgaatttttcgttgctgcaccatcgcctgctcaatgcggtcattcagtcccctcacgccaaatgggccttcgcgcagcgcacaaagcagttgatactcattgaaagcctgaaggattgcctccggcgccgctttttcatgcagcaaccgcaggtagcgcccatagcccgccagcgcttcatccagcatcccggcataatcgtcgctgctttgcaatgtacgcttctctatatcgctaaacccctgctgaaaaacagcctggattgccgacctgtcgccacagttaattgccgccgccagcttgccgatgccagaatcgctgccgaaacggtagctcttttgcaacaaacagaggctatcgcgtaaagacgcggcttgcgttccggcccccgccggaatggcgctaccagtgagtcgacttagctgtcgggcgcgttccgccgtaaaccctgcgttgacataggcgcaaatatcgcccaacacagcgcccgcctcaacggatgccaactgatcgcgatcgccaagaaaaatgacccgcccgtgcggcggcagagcgtcaatcaaacgtgacatcatcggcaaatcaatcattgatgcctcatcgaccaccagcacgtccagatgcagcgggttgcccgcatgatggcgtaatcgctggctgccgggctgtgcgcccagcagtcggtgcagcgtactggcgtcctccggtatacgctttttctgcgcatcggtaagaggaagctgacgcaacgccgcgccgagcgactccgtcaggcgtgcggccgctttcccggttggcgccgccagccggatacggcaacgttcgccatccgccatttgaattaatgccgccagcagcttcgcgacggtggtggttttaccggtgccgggaccgcctgaaatcacggagatacggcgagttagcgctacggcggcggccaccttttgccagttcacctcgtctgtcggagggaatagcgcgtccagaatacgggataactgatcttcatctacggcgatggcctggttaacctcgttaaaaaagcgcgcaaccgtacgctcgttgcaccacatgcgattcaggtagaggcgatcgccgcacagaattaacggcgcggggctatcgccgcagctaaccgccgcagacgccagtaaccgctttttccagtcgattggcgtagccgtttcgcttatccaggcgaccagtaagggatgcgcctcctccgttaacgttaaacgcgacaacggcagacacacgtgaccttcacctgcgtcatgactaagcagcgctgccgccagcgtcacggcgggatcgtcgttaccggcgacggttaaagcaaactgggcatcaatgggccgtaagagtttttgttcaacggcctccagcaaccgcttctggattgtcatnnnnnnnnnnnttattcctctttctgtgtgggatgctgtcggccagaaacgacctccatacgggcgccaccgagcagactgtcgctggcaatgatctgcccggcgtattgttccgtaatctcgcgcgcgacagccagccccacgccttgtcctggtcgtagggtatcggcgcgctgaccgcgatcaaacaccagggaacgtttgctgtgggctatgcctgggccgtcatcttcgacgaaaatatgcaaatgatcgtcggtctggcgagccgaaatctcgacaaactccagacaatatttacaagcgttgtccagtacgttgcccattacttcgacaaagtcgttttgctcgccgacaaaactgatctctggtgaaatatccatactgatattcacccctttacgcagataaactttattgagcgcggagatcaggttatctaacaacggcgcgacgggatgcagttcgcggcttaacaacacgccgctaccgcgcatactggcgcgatgcagataatagccgatctgctgggaaatccgactgatctgttccagcatcaccggttcagctttgctgacgctcatcttttcgttgcgtaaagagcgtaacgtactctgcaaaaccgcgagcggcgtttttaaactgtgcgtcaggtcggttaggctcgtgcggtatttgttataacgttcgtgctcgcttttgagcagttgattaaggttgcgcacaaggctgatcagcttacgcgtcgtctccggattgagcatttcgcggtgatgatcttcaagttcgcggacttcccgcgacagcgcatcgatagggcgtaagctccaccaggcggcgatccacagtaaaggaatgactaacagtaaattggcggccagcacgtatacgaaccagctccacaccatataggagccttttagctctatcggaatggtatcgaccaccacgatggttaactgcggcatccgcgtcgtggcaggataaatatttaccgctaccgagtgggtcatctccgcatcatcgtcatcttcacgtacttctttgagtttttcctgcgcggaatggtcctcgctcaacagcgtgctggtggcgtctacgttggtttcaatttcatggaagccgttcgtttttaacccttccggttgagtgcttttaatcagccaggggatgttgcgctgcgtccataataatttgcccgtttcatcgtaaatcaccgtcatggtcgggctttgcatgtccagattttcaggcagctcaacgcagattttattattttcccatttggcgagggtataaaacaggttgctttcgccgcgcagcagacgaaacgtggttttatcaaaacttacgctatagccgaccagcgccactatgccatatgccagagaaagcacgagcacgacgccggctgtcgccagcaaaaaacgaacccgcagcgacagcggcagaaaatggcgagcaaatttattcatnnnnnnnnnnntcatttttctgtgatttgttctgcaagtcgggcaatacgccttgccattccccggaaaataaacaggtgcgccgggatcatcagtagccagtaaatcaggcccggcattccgtgtggatgccaccaggcgcgcacgtcaatttcgcggtagcggcctttatcgtgcagcgtgaagctaagccgccccagacccggcgctttcatgccaaacaagagcgtgagctgtttttctggttcgacaatgatcactttccagctatctaccgtatcgccaggcttgagcaaggtatgcgacgggcggcctttcgccagtttatgccccaccagacggtccatcgcggcgcgcgtctgccacaaaatattgccgaaaaaatagccctctttgccacccagccgatttacgacctgccatagcgccgataggctggccggggtctgcgcggtaaagcccgcctgctttggaaaatagccgtattcgggacgccagcgggcgaaggccagcgcgtcgtagccccagtcgctggagttcaccagtttttcttcttctttcagcgtgcggcgaacggcgtcatcaaaggtgataagcgtttgggggatcaacttttttaacgcggcgtcatcggccagcaaatcgtgccttaatccctggattaacgcttttgcggtagttggcggcacggaggtaatgacgtttaaaaaccagaccgaaatccagcgggtcggaaaaggcaccgggatcagcggacgccgtttaccgctgacggccataaaacgttcaaactgctgctgataacttaatacctgcggcccggcggcttccagaatacgatgctcgtgcgcagggtgctccagtaagccgaccaggtagtagagtaaattttccagggcgatgggcgtggtgcgcgaacgcacccagcgcggcggcgtgagtattggcaggttgtaaaccatgtcgcgcatgacctcaaaggcggcggagcctgcgccgacgatgatcccggcgcgtaattccgtcaccggtacgcctgcgtcgcgcagcgtgtcagccgtaagctggcgggcgcgcaggtgatcggattgctcatgcgccggcgcctgcaatgaactgaggaaaataagttgtttaaccggcgtctggcgcagcgcgtcgcgcacgttgagcgccgcctgacgctcatgggcgataaagtcgccgccttcgcccatgccgtgtaccagatagtaaacggtatcaatgtcgcgaagcagcgcgggtaaattttccggccagtgcagatcgaccttatgacaactgacgttggcgaggcgatgtttttccagacgttccacgcgccgcgccgccgcccgcacctgatgtccttgctgacttagcgcaaagaccaggtgctgaccgatatagccgctggcgccgaggaccagaatgcgttgcgccacnnnnnnnnnnnctagatcacgtattcgatcaacgctggttcttgtttacagaagcgacgccagtcgacaatcggcattcgtacctgcggactgacgctaccgtcttcctccatccactctttttctattgcctgaagctgataaaaccggcttctcagacgcccttcctgcggcagcaaacgcagcgtatgctgcgccacctcgccggaaagacgctccgtcaaagcctgaaaaagctgtggtattcccacgccgctttgcgctgaaagccaaacgcggatgggtttattctcttcatctctgtcgatacgcggttcaaagtcgtccagcatatcgattttgttcatcaccattaaggtggggaattcgtgagcgtcaatctcttcaagaacggtgtttaccgcctcgatgttttcctgcacacgaacatccgccgcatcgaccacatgcagcagctgcgtcgcctgacgcgtctcctgcagggtagctttaaaggcagccaccagatcgtacggtaaatggcggataaagcctaccgtatccgccagaacggtctcaccgacatccgctacatcaatacgacgtaacgtggggtccagcgtcgcaaatagctgatctgccgcatagacccgcgcttcagtgatctgattaaaaagggtggattttccggcgttggtatagcccaccagcgataccgtcggaacgtcggccttgatgcgcgactgccgcccctgctcacgttgcttctcaactttctccaggcgcgactgaatctgcacaatgcgattacgcagtaaacgacggtcggcttcgagctgggtttcacccggaccgcgcgaaccaatcccgcctttccgacgttcaaggtgggtccagccacgcaccagacgcgtagccagatggcgtagctgcgccagctcaacctgcaacttaccttcatgggtacgcgcacgctgggcaaaaatatctaagataagaccggtgcgatcgataacccggcactcgcacaaacgctccaggtttcgctcctgggctggactcaatgcatgatcaaacaatacgaccgctgcgccagtcgctttcacggcttccgcaatttcaactgccttaccttcacctacaaagtacttcgggtgcggtgctttacggctaccggtaatcacctgcattgcttcgacaccggcgtaatagaccagagattcaaactcctggaggtcttccatatctttgtcttgcgaaaaatagatgtgtaccagtaccgcctgctcaccggcatcataacggtcaaacaannnnnnnnnnntcagccgctaaacacgttaccggcgcccggcgcgctttttaacacccagacgcgaccatagtgattataccatccggcacgatgcccggcatccgggccaatcccctggtaaatatcaaagtgctggcctttaatcgctccgccgacatccagtgcgaccatcaaacgtagctcatactgaccgctaaatttaccgttgttatccagcaacggtacttccgccaacaaggttgtgcccggcggaatgatgctgcggtcggaggcgacggatgctcgcccaatcagcggtacagcgctggcgcctttgaccggcgcaaaagattgcggtttaaagaagacgaacgacgggttctgctccagtaattcacgcacttccgcttcgctgtgcttctctccccattcgcgtatagcctgcatcgacatatcttcttttttcacttcaccgcgatcgataagcactttaccaatactgcgataaggccagccatttttaccggcataactaaagaagttcagcggactaccatcaccgaaatcaatataaccgctgccctggacatccataataaagttatccatcagcgaattactccaggccaggatgtacttatcgctcagcgcgcctgcgtagatctgggcgcgggacggtaagcgtccgcgttttggcggcatactatagatagggtactggaacgcgccctggcgcgtatggcgagcctgaacgacgggcgtatagtagcccgtgaactggacgttaccgtagttgtcggtgccttccatctgccaggcatcgataccaaactgacgcatagtgcgcgtatcgcctccggaacgtaaccagttctggacagcgttatagacgttgctttgattggtgtataaacgcggcgacgcggaacggatctggtcgacctgctcggcaaagtcaccagcattaatcggcgcgcccaccgcgtccggctggtttaccagggagaagggctgggtaaatttcccgtccttatattgctgaccgcgatcggtcggttttgatgaacaggcagccagcattgccagcattacgcctgtcgccacatattttgcccaacgtcctttcatnnnnnnnnnnntcattctgacacctccattttttgcgccattttggatgctctgtattcagggatggtggtcacaatcgcaccgactaacgcgaacagcgtaccgatgatggtgaccaggtagaccgtattgcctaatgaagggatcaattcatcaattagcactgagccaagcagttggcctgctgttgacgctacgcccagcatcaatagccctaagcctctcaccagaatcgccattagcccgatggatagcagacccagcggaccaccgagatacatccaccatgtatcgggtaactggatggtgacatggcctaatgcgatacgtatcgccagcgccgcgcccaggacacaaaagccgacgatgaagttccatgtaatggacaccagcatggagcccgttgcctcggcgactttcgcattccccgcaggctgccagccagcgagtaaccctgccaaaaaggggaggatagcgagcaggataaacgaggttgagtgccactgtggcgacacgacaaaaatggtggcgataacggcgaacaatgcgccagtaatgcgccatggcgtaaaatattttttctcctccacgccgatgccaaaacggtcgcacagcaggccggaaagaagcagagcggaaattaatgccgtttgaaaggtggcaacgcccagcgcgctggcggatgcgccttcagaaaatacgaccatcgccccgcataatcctgcaaaccaattccatagcgggatttttctctttttaatcagagtagggattgaggcgaattgctggcgtgtttctttgcgcgcaataataataaaaaacatgacgaccagaccgctggcaaacgagattactgcgcaagcattaccgtcttgtaaccaatgtcctaactgcccattaacggcagactgcatcggggaaagcataccggctaagatggtggcaagcatcagtaagggggttgagtacttattcttgttcatnnnnnnnnnnntcagttaaacggttgtaagtcgacacgcgccatcattgcggccaactgaggacgatcggtaatacccacattgctctggctgaccgccagcgcggcaaccgccgtcgccaggcgcagcgtatgttcggtggactcgcgcatcagcaggccgtaaatccatccgccaaccatggaaccgcctgcgccgacggtattttccacgtcaaccgccggtggtttagcgatccattctcctgaggcgttaacccgcagcgcgccttccgcccccagcgaaatcaccacatgagcgataccctgttcgcgtaacgcgggcgccgcatcaatcacatctttcatttccgggagcttacgacccgcccaaatttccagttcgcggcgattcggtttcaccagccacggcgcagctataagaccggcgactaactctacacggctagtatcaacgataatgcatggacactggctgcgcagacgcgtcatccagtcggtgaacgcttccggactcacgccagccggtaagctaccgcttacgcagaccatatcgaactgacccagccagctcaggaagtcgttaacaaagcgttcccagtctgcgggagtcacgtcaaagccgggaaagttgaagttggtcacttcgccatctttttccgtcagcttcacgttgatgcgggtccggccctgaaccacctgaaagcggttagcgatacccagttcgctgaataattgctgaaaaccgtcctggttatctttaccgagaaaaccgccgacagtgacgtcgatgcctaagtctttcagcacattggcaacgttaatgcctttgcccgccgcgtgcagacccggggttttcaccaggttcacnnnnnnnnnnnttaccattgcgtgccaactcccacgctgtctaaccagtctgaaaccacatcatgcgcgctgtgcgcggttaaatccacatgcaacggcgtgacggagacgtagccttcatccaccgccgcgaaatcggtatccggcccggcatcgtatttatcacccggcgggccaatccagtacaatgtattaccgcgtggatcttcctgcgggatcactttatccgctggatggcggctaccgcagcgagtcacgcggatgcctttaacctgcgctaacggtagatccgggacattcacgttgagaatacgcccggtacgcaacggctcccggcttaaccctcgcaaaagcgcgcaagtcacggctgcagccgtatcataatgctgatagccgttaagggagaccgctaatgccggaaagccgagatgacgaccttccatcgccgcggcgacagtaccggaatagatcacatcatcgcccagattcggacccgcgttaataccggaaacgacaatatccggacgcggacgcattaaggcattaacgcccagatagacgcaatcggtcggcgtccccatctgtacagcgatatcgccattatcaaaggtaaaagtacgaagcgaagattccagcgtgagggaattagacgcgccgctgcggttacgatccggggctacgacctgtacatcagcaaactcacgcagcgctttcgccagcgtttgtataccgggcgcgtgaaccccgtcatcgttactcagcaatatgcgcatnnnnnnnnnnnttagtccccttcaaggagcaatacagacacaacaataatgataaaaatggcgaaaaacgacgctgttatcatcagcgcttcaagaaacggtggatcgtacatnnnnnnnnnnn diff --git a/tests/test_data/outputs/extract/G12/blast_db/contigs.fasta.ndb b/tests/test_data/outputs/extract/G12/blast_db/contigs.fasta.ndb new file mode 100755 index 0000000000000000000000000000000000000000..5fd6f7085890c929e9fa647574a304e6e7c1d317 GIT binary patch literal 20480 zcmeI&O-jR15CGt@3T7h_@eblfJc3sVVx^$TqQ!Nu)s6HdF1xii(B|hgQK1OcMd8aL zZ(j2%Z#3(Lfo-gxxPK5vg0t5&UAV7cs0RjXF5cpex+fBMroW@J_R2I(_vt9+vdbW;Cp?ST|L5M{(H>~`Ywr(nmnrAQuY-8B(-FP1SW8)uZFHGU2L1;2 z^3(p1yYcNGrqh)t6ua+&C{p2eU+wVdiwpaV(t*IN2>B3IUGOHRlE)L&AK}6N+5S9h zds|e?AHiP)ekMA#f_BLoS6;m$MAGK0d6$Tc=4sVjv#BVxjXu&Do^Lxrn-;SG05@WHbH3iD=Fo2n6nq`RdH|JTz*vzti=o-CUTj*|% zV`FaW?WR9*Q|h7X|7qEA{`*&AhjXmu7}<#a-&$gbq1YW0+!SH<{#JS8TA!J{vALeg z(#RUI=Rs*@6ROuDe^)@q@6}bWyXxCrC=u+SnIFoL>wTh$<(Vl-qoZTV>yNLEF01_* zvcC(u(N6jXM>_V~?g+RPwugJ?cL*nN3^O=4sv;BhBem`SnJ`#GIlnrdTt?T;HX9#1 z;88Mlf9~DWr6sB~Xn#bc?#`-#L*7Qof#s%^pLgueSQDpS`JzV8AR6Q)#q5n+SqPa3jsXPE9ZBuQ;p)rf^iyx_`;&wY~h<*BXfGF!?e~4z=?z31m?01I(r7xURZWLBvr30|ZQ#|WHqwut%G7h|4>k$a zwx63FHHHPC-sJQUo29B?&NV9I=9ZjPmOFno ze`#OUJ^7C&q&b=}Iw@Is$1IHdn+tm`$amDf`(i#X$L`x3Gx(tPV)Ejp(E}|s4*p=5 zFNU&jlwmp>y)gOI(3S)6s`+e|7O&O4(g!Z>(3Vnn&PB6Fai5NnVW2BE)urQY>0-~9 zAWavJddd8|ia3T1(r20U`5%d;kyWIFS7=WM7d8o1`QqF6$O#A<=wO9!h)+Hel)jm6 zYl6VYN*A0h3$uudGBo9>(_Es<6wk>CZE7fc}z& z<13-Fa0hk~PZAbe@mb(1jJlv7)cm$;n=L9S+@9U19d+J`rh)F&yl?r=@rRobww)Q4xpFQldkLTj9}##s&EDdt-dcuk>QTlZJ<)=p`dbG@uhj zuP@3f*Z#NS<7><*xS`E3qKtxj&h?KCDr9t23zB7@ZAYNXU zq;=r@2LI2L_D9K4TU5BI;%QS}@jEhz^5LJFP54Lv_?dp;tHD#R*icm6J2GxQLuiHHGI4ITU@@W< zo}7YBVq{h3#m8uDZ>jOToGbLYhBpAi`c8W&U9>8qUAVj=i$|IP%%tqg!x9gI#s!gf z+zvRbAo~r<*w_tmA+b05L(1Dz5CSqy@XBX?x{;FUxR~Oy>wPUUCyqwqpJa*WD8MP( z3LG5H9%o61VwaBM7|6n);CQ`p4O#6X>T^MjoH-JV8(l@O6|nJ~k^!@3e}hJ9v2ZUR z`$_i(B1J8u(3Y8NoFf&Ysr$oJ!@4~9qi!Z+=UqgKL}2kDa)ANwF0ar5r%no@#^b1< zRXozLf;XRvl5yi=O4rN( zBHWcXac}XS?r;kC{@b|s^Z)@wJ=Q292ulz#mK?C2Ias}bX!3NV;kmp#RW%8w(1jK z0-{XY64P^V{ENq}qwtSL4v%($JDfm%3c3hTncicIVp&BH{(j+#v~S(!cgA5BAe69R zzsg+Kjip-C_>#L%?5YUz zeG3Nhqg@}WnWTeFjiLXb8JJ|RH?DCz^<}$I?1YBY_46a7u5Zo*Z5O@PYlogWD-%4k zqF2fdB}Bi}cZi5uI~fCT?gb_|bcYQrpBUtk;s*#Q!zL#woa;Q{NqFplDo8l*6U!Eb{-E`+!)aVPwHO;Mp~;3EU|xV*NVL~^tw>;bb> z49+KL@x@b4;RZ#8ebeOov(R|h5AN>A47#-kUx!4*$8y7=oPDFKdx@*MJL^jb?hQE^ zNYKd`)EWewWZ-d83TSQn_zdYZwC(=fLCF@k7Uy+W*4`)@O$q)wK|TeXE8Eq0s-NTE zPehjR^c@1@I&b(>K(Ef*TW5fG!A%f=RSCx+p(4Xa9q2wC4Qa5fsUPga>Ci!opQND z`t#|P*$nuRX7RlZ@$eFUjA7~%%X6|A{#XC_;SZ`e!_n2q?EJn$xFo3d$Bzpr?*?MCy|)`9M;#_WLc?elT*TC zj{jtU%BDNUyt`;P+`H#!`blE^IxoK-()?Xe%i&AlY*fzMY)N>Cwq6ubl6SU@xllet zY*c1PfEedL8l6B)ns2o;fH(KsGH^TTzl{qhF1B=A(%?d{h)ldxiod)o^Qq<@Z{X}? zbED&sK;hS_sPUf~IVdd;Ov+wAMFOB=wpJUByzWl;D}!OKi)STzk~beC5XQ^x=L!<# z4hz0{Din=gz_Iy<5jXH@akkhmyrbzd(0R8u<^df7eyc`D#3|~{ra0UJ(6G=tcTVX% z+{b&-8|fd%&Z^oZHS`P3;D@p<9xQW50_P`-*F1yMF46qRvTRveGjAk1wD#iU29?!k z9Efj-`3ct+{3a2|t4Le*TqMPn$+sdhDBF0MF)<(^>Vn_A%{nZOg4P` zj>d?Wh$kM%Yb9Fz1mg?&=3luvic;mPE7D&@jQyP*7|?A^}Pm4V%tE=j$fJUS7^<}Wmhd@-PW`@sBA06rT* zqJ=RxFm{_2lNzY>3oYVm9lY>3+8B5Ei|0Os6eRpWeX^xGHpwP17WZu^7r}*BXzVNBj=C70I|UUfiXBgHnqw z<`t2_61A%hg7Cc1DGZ2}0z`5EMmD7~>f-#@eO;H51j8rq52wg{oA#4HJ9#c(C=Q3; zIlJnuwe7mB-b(ma0{Jjls=N-oA3QG}f||kljf$lJiC1^w8hl}qcNxzD6Ai9|jgumT z^ldCdtp`L*J{r9wmr}7dEZn59B`na5iAiS1pMwB@2A7%4O04cm`3_;~ujq~EgHRN8 zpid+usmaSAkDpD4FNTnU#%AO^E=RzqZSAZjXB4IO{=HkW3w2T=3{H{n!VaH{yXi#h zPczT9*dskDaicVCN`UD>y0?p4u%?3f>MPOUHOm>guo6SE)8*s99f*{Loq3Yy39>mr z06Zy!5(j1yv;^Me>3-v!O`^bGZ6G;M1ll=8dcO^)ilG40Pon5yoSC) z?%dmybB}V*(Q)uiA?6S^%j&Z@e}YYB{ut;uL!;W-1{Zx!9{#;s5}P6D`Gs(yRs8W( z(Jj`V!AOHiBh1i!>X~pai#^F>&LI}gy+>xbj$2=6 z39SipC%h%po0HAL^Co_zPH4AMFq!QXK9Za|V6mqek0#HTOl6)PT~yU?V{Z9vQj3NX zX|%qu92oyQqqzIy<(A>%k|plaOdVuWnPb<*x=-G3qZUr&%jpTtgvBr>DmRGz5M&wx3ieE>cL+IYS`F;an=dbh z<23G?&{aW|84Yihdc3t8c0SfA8~}fnJM%8inWKf_$)F|MRUjT5A5_nV51}@ z@Q_K?1F>eLIel-h8sFV{7qnl>uU41~kIDS!3b9r-L?MkOH~A?m*z=>S!9e&5l^p;@ z+biSmL)CgD_G+qAgvU(~CD8|H!?u9zL$g`y_t6ymLkV0&)PC?J;bWP%W`-srR@fLm zJnxe1j?@TWD9huj_a?OIT1C2`YD)F7H~ZqiRzwm#K&x46O6+)kH@kvf*`Ky~>Ud05 zvA-Ub@M2q=29UUkYH4sRUPym%SLyIc_vdF|lb1wjs|b(IA=5erdKpM|V~nVQ|0Fq{ zMwp0GNg;Ys{#a6}!LC+a$q0mEL0jH~MVY5RMr+H!#ZdWUv+R2FZX?-6ys+MLfcMav zdVYLu=5mzas~VRO4ADqn3}xRj@m7>qhJzU4Y%7MMGAtD;UXb|l|Dlju3{ zV9f z3Ef&fMu{VXU5uWc_iVcJen_$I4*wV*NA!>Y>E^p?A}Sk!J- zDXU^hL6sM>ds{d?DT(%iq&?S+qPzUHG8;>xl3THHB!QU=sHQgSf9&VQ0LVLl8}ldb zyxD6z=RbDE#NL8ApUAdi2nO592V=Zqf!0_`<&B=n2``_`Bn2Q-n{#43z2&oytRI`*2B)y)Qh39f^%Qm=sm*VeUMPvx ztO?De20WNE8@QGmRzT*tQ0kq%9x77|i^25%hZ$)2bEyU5geT%hHy(a_foD?pkQ}#n zN;qVnnR_M_Eqn?oi$Cse{(vl0irk_oaC4E<^N0%{5)dyw*B%RD6NuS|ERaqLrYy0PY1aG!-}Ee@WMcZR zmXIoh=S)-CLtYI)kf8*`ipa?;z~B1fYU4Ne=JqF%a%$?9#t3-)ejo^W4m9GmewwM0j*#d4h+~K|4LYYp5xhb>FBYhx$BhZbh0`=_}!(kXYzi)CcP_lZ6GiOzIkkpvp97HHO>%mv<#nrF{Hd1sOQDY3-%rNJ_2C zvx);jh|&5(pHHwu+RHB$#HSV@u3OjU7x?Q}z5n<1U-PplRo{&HkM&d}PpaTqY>#Fb zga#EW$3YZbmoFp4i1rPAI1|%Hh|mckc?Vtq72yoeewHr((Tbq|JH@_fBbRoJbQq^sd=IwyW?qNhZ~0f#TzIB6lI#7M|%GD+3?F6#|fBQq*ms+ZZtG+Y5rl@^dL1B z27@7%f5tLQ=j}2Kp1Taw+p-K>pS}z;tX+l~CoaQ&v08?0IJFG3?OcYTJ}<-m_<5NB nTZVbuT88;Bmtpu{mSKUvEyIAHT^N2DM*6w?^X@Y2sLlTaOV*_q literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G12/blast_db/contigs.fasta.ntf b/tests/test_data/outputs/extract/G12/blast_db/contigs.fasta.ntf new file mode 100755 index 0000000000000000000000000000000000000000..005ac416d78d808991db961db8e272a0664078b1 GIT binary patch literal 16384 zcmeI&O=`kW5CGt5>cWK(D?Lc@1YSi@p{p+HqW9@Fr1T_$d!a|DnwR0}#*Gvzd|71X z{g6z)-S85T*RV^r-#=duIdzJkyQgkAju_6m?CR~ezpidNTg?3z?>t+V5+Fc;009C7 z2oNAZfB*pkw+qz1`!D+c`t+KWnseOrpYYhnoWK3cJb?fK0t5&UAV7cs0RjXF5J-XS zzayHM#HQV}xgruEK!5-N0t5&UAV7cs0RsO);N@6E6QfwqKVBEpx7F%>xjxnP9Q72v tW~JgB=Xra^(*N(D`o%y literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G12/blast_db/contigs.fasta.nto b/tests/test_data/outputs/extract/G12/blast_db/contigs.fasta.nto new file mode 100755 index 0000000000000000000000000000000000000000..20d5cb86e6dff1f3684dc229a358a2ea697cecfb GIT binary patch literal 8 KcmZQ%fB*mh5C8%I literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G12/filtered.hsps.txt b/tests/test_data/outputs/extract/G12/filtered.hsps.txt new file mode 100755 index 0000000..8463ce6 --- /dev/null +++ b/tests/test_data/outputs/extract/G12/filtered.hsps.txt @@ -0,0 +1,24 @@ +qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore locus_name ext_start ext_end is_5prime_complete is_3prime_complete is_complete is_5prime_boundary is_3prime_boundary is_on_boundary reverse complement is_extended is_5p_extended is_3p_extended +0 0 102 23855 0 101 23843 23742 102 0 100.0 100 100 minus 1.92e-51 189 locus_1 23742 23843 True True True False False False True False False False False +9 0 762 23855 0 761 23730 22969 762 0 100.0 100 100 minus 0.0 1408 locus_10 22969 23730 True True True False False False True False False False False +10 0 858 23855 0 857 22957 22100 858 0 100.0 100 100 minus 0.0 1585 locus_11 22100 22957 True True True False False False True False False False False +11 0 972 23855 0 971 982 11 972 0 100.0 100 100 minus 0.0 1796 locus_12 11 982 True True True False False False True False False False False +11 0 972 23855 0 971 22088 21117 972 0 100.0 100 100 minus 0.0 1796 locus_12 21117 22088 True True True False False False True False False False False +11 0 972 23855 0 971 982 11 972 0 100.0 100 100 minus 0.0 1796 locus_12 11 982 True True True False False False True False False False False +11 0 972 23855 0 971 22088 21117 972 0 100.0 100 100 minus 0.0 1796 locus_12 21117 22088 True True True False False False True False False False False +12 0 1098 23855 0 1097 21105 20008 1098 0 100.0 100 100 minus 0.0 2028 locus_13 20008 21105 True True True False False False True False False False False +13 0 1281 23855 0 1280 19996 18716 1281 0 100.0 100 100 minus 0.0 2366 locus_14 18716 19996 True True True False False False True False False False False +14 0 1434 23855 0 1433 18704 17271 1434 0 100.0 100 100 minus 0.0 2649 locus_15 17271 18704 True True True False False False True False False False False +15 0 1464 23855 0 1463 17259 15796 1464 0 100.0 100 100 minus 0.0 2704 locus_16 15796 17259 True True True False False False True False False False False +16 0 1836 23855 0 1835 15784 13949 1836 0 100.0 100 100 minus 0.0 3391 locus_17 13949 15784 True True True False False False True False False False False +17 0 1914 23855 0 1913 13937 12024 1914 0 100.0 100 100 minus 0.0 3535 locus_18 12024 13937 True True True False False False True False False False False +18 0 2037 23855 0 2036 12012 9976 2037 0 100.0 100 100 minus 0.0 3762 locus_19 9976 12012 True True True False False False True False False False False +1 0 285 23855 0 284 9964 9680 285 0 100.0 100 100 minus 1.1e-152 527 locus_2 9680 9964 True True True False False False True False False False False +19 0 4935 23855 0 4934 9668 4734 4935 0 100.0 100 100 minus 0.0 9114 locus_20 4734 9668 True True True False False False True False False False False +2 0 327 23855 0 326 4722 4396 327 0 100.0 100 100 minus 5.7e-176 604 locus_3 4396 4722 True True True False False False True False False False False +3 0 417 23855 0 416 4384 3968 417 0 100.0 100 100 minus 0.0 771 locus_4 3968 4384 True True True False False False True False False False False +4 0 444 23855 0 443 3956 3513 444 0 100.0 100 100 minus 0.0 821 locus_5 3513 3956 True True True False False False True False False False False +5 0 543 23855 0 542 3501 2959 543 0 100.0 100 100 minus 0.0 1003 locus_6 2959 3501 True True True False False False True False False False False +6 0 606 23855 0 605 2947 2342 606 0 100.0 100 100 minus 0.0 1120 locus_7 2342 2947 True True True False False False True False False False False +7 0 642 23855 0 641 2330 1689 642 0 100.0 100 100 minus 0.0 1186 locus_8 1689 2330 True True True False False False True False False False False +8 0 684 23855 0 683 1677 994 684 0 100.0 100 100 minus 0.0 1264 locus_9 994 1677 True True True False False False True False False False False diff --git a/tests/test_data/outputs/extract/G12/processed.extracted.seqs.fasta b/tests/test_data/outputs/extract/G12/processed.extracted.seqs.fasta new file mode 100755 index 0000000..ad2a6e7 --- /dev/null +++ b/tests/test_data/outputs/extract/G12/processed.extracted.seqs.fasta @@ -0,0 +1,42 @@ +>locus_1:0:0:0 +atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa +>locus_10:9:0:1 +atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa +>locus_11:10:0:2 +gtgaacctggtgaaaaccccgggtctgcacgcggcgggcaaaggcattaacgttgccaatgtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccaacttcaactttcccggctttgacgtgactcccgcagactgggaacgctttgttaacgacttcctgagctggctgggtcagttcgatatggtctgcgtaagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatcgttgatactagccgtgtagagttagtcgccggtcttatagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcccgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgcgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggaaaataccgtcggcgcaggcggttccatggttggcggatggatttacggcctgctgatgcgcgagtccaccgaacatacgctgcgcctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga +>locus_12:11:0:3 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>locus_12:11:0:4 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>locus_13:12:0:5 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>locus_14:13:0:6 +ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctattacgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccagcgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcggaaaggcgggattggttcgcgcggtccgggtgaaacccagctcgaagccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgagaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgcagctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaattccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtccgcaggtacgaatgccgattgtcgactggcgtcgcttctgtaaacaagaaccagcgttgatcgaatacgtgatctag +>locus_15:14:0:7 +gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga +>locus_16:15:0:8 +atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctcgtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatctgcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacggtgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcactcaaccggaagggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaaggctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgatgcgctgtcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtaagctgatcagccttgtgcgcaaccttaatcaactgctcaaaagcgagcacgaacgttataacaaataccgcacgagcctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatctgcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcatagcccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttctggccgacagcatcccacacagaaagaggaataa +>locus_17:16:0:9 +atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa +>locus_18:17:0:10 +atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag +>locus_19:18:0:11 +atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacatcgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccacagtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggtcgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattacttgaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcatacagggcggtacggaagtttgtcagcaaaaactggataacatgcgcaacgcaggcgtaaaagtgtacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggggtcagcgcagagctcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacagaagaccggcaagctcggcgagatcctgttctttatgcgggcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccagcaatgactggcagttcgacggcgacgcggaaactattgcacactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgagatttccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa +>locus_2:1:0:12 +atgaaactttttctgacaacggcagcgctaactgcgaccttgacttccggcatgggatttgccagcgataccgtgatcccctgggcgactaacagtggcggtacggaaagcacgcatatcgcggcaatgggcgaggatgtgaatgcgcagcatcagcacatcatccacacgcatgaaggcgtatgtgccgcaaattccggcaccattcaggctgatgaagccgcgctgaccagcaacaagccaccggtacaagtccaaccggaattgttgccgcatcagggctaa +>locus_20:19:0:13 +atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa +>locus_3:2:0:14 +atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa +>locus_4:3:0:15 +ctgaaactaatacctttttatttgctcgcgctattttctgcctcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaacccctgcagataacccgccttataccgtgacaatattgccgccagtagaagcctgcggggaaaaatgtgtgaaactgaacgtagagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaacgacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctacaaccgacagctgcaattacttagggctggcgtaataccgaaaaataaaaagtggaaatggataacgaagatggaaaattactggttcaggcgggtaaaagcgcactaa +>locus_5:4:0:16 +atgaaaaaatttgatagctgtcatcctgtctttttactgatcggctgtgcccaggtgcctctccgttcctccgtgagcaaaccggtacagcaacctagcgctcagaaagagcaactggccaacgcaaatggtattgatgagtgtcagtcacttccgtatgtgccttcagaccttgcgaagaataaatcattatcaaaccagatcgctgataataccgcatcaaaaaatagcgcaatcagctcacgcattttttgcgaaaaatataagcaaaccaaagaacaggcgcttaccttcttccaggaacttccacaatacatgcgttcgaaagaagttgaagagcaacacatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa +>locus_6:5:0:17 +atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa +>locus_7:6:0:18 +gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggactcacaacatgcctgtagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgccagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcgccatccagtcggcaacctggttttcggcaggcaggtcgcgtagggcggttaaaaaaatgatggattgcggcagcggtatttattcttcaatcaatacgttactgaaaaattcgcagattaaaaatatagtcattttaacccataatcattgtttgacgtatattgtaaaaaataagcgcggcgtgaagtttgatccggagtatcttaatgcgctcgttatgtatgcggaaaacggaaaactccttttagacggtgaatttgttcccggctag +>locus_8:7:0:19 +atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga +>locus_9:8:0:20 +atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/extract/G12/raw.extracted.seqs.fasta b/tests/test_data/outputs/extract/G12/raw.extracted.seqs.fasta new file mode 100755 index 0000000..ad2a6e7 --- /dev/null +++ b/tests/test_data/outputs/extract/G12/raw.extracted.seqs.fasta @@ -0,0 +1,42 @@ +>locus_1:0:0:0 +atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa +>locus_10:9:0:1 +atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa +>locus_11:10:0:2 +gtgaacctggtgaaaaccccgggtctgcacgcggcgggcaaaggcattaacgttgccaatgtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccaacttcaactttcccggctttgacgtgactcccgcagactgggaacgctttgttaacgacttcctgagctggctgggtcagttcgatatggtctgcgtaagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatcgttgatactagccgtgtagagttagtcgccggtcttatagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcccgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgcgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggaaaataccgtcggcgcaggcggttccatggttggcggatggatttacggcctgctgatgcgcgagtccaccgaacatacgctgcgcctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga +>locus_12:11:0:3 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>locus_12:11:0:4 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>locus_13:12:0:5 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>locus_14:13:0:6 +ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctattacgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccagcgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcggaaaggcgggattggttcgcgcggtccgggtgaaacccagctcgaagccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgagaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgcagctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaattccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtccgcaggtacgaatgccgattgtcgactggcgtcgcttctgtaaacaagaaccagcgttgatcgaatacgtgatctag +>locus_15:14:0:7 +gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga +>locus_16:15:0:8 +atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctcgtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatctgcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacggtgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcactcaaccggaagggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaaggctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgatgcgctgtcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtaagctgatcagccttgtgcgcaaccttaatcaactgctcaaaagcgagcacgaacgttataacaaataccgcacgagcctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatctgcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcatagcccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttctggccgacagcatcccacacagaaagaggaataa +>locus_17:16:0:9 +atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa +>locus_18:17:0:10 +atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag +>locus_19:18:0:11 +atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacatcgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccacagtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggtcgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattacttgaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcatacagggcggtacggaagtttgtcagcaaaaactggataacatgcgcaacgcaggcgtaaaagtgtacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggggtcagcgcagagctcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacagaagaccggcaagctcggcgagatcctgttctttatgcgggcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccagcaatgactggcagttcgacggcgacgcggaaactattgcacactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgagatttccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa +>locus_2:1:0:12 +atgaaactttttctgacaacggcagcgctaactgcgaccttgacttccggcatgggatttgccagcgataccgtgatcccctgggcgactaacagtggcggtacggaaagcacgcatatcgcggcaatgggcgaggatgtgaatgcgcagcatcagcacatcatccacacgcatgaaggcgtatgtgccgcaaattccggcaccattcaggctgatgaagccgcgctgaccagcaacaagccaccggtacaagtccaaccggaattgttgccgcatcagggctaa +>locus_20:19:0:13 +atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa +>locus_3:2:0:14 +atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa +>locus_4:3:0:15 +ctgaaactaatacctttttatttgctcgcgctattttctgcctcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaacccctgcagataacccgccttataccgtgacaatattgccgccagtagaagcctgcggggaaaaatgtgtgaaactgaacgtagagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaacgacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctacaaccgacagctgcaattacttagggctggcgtaataccgaaaaataaaaagtggaaatggataacgaagatggaaaattactggttcaggcgggtaaaagcgcactaa +>locus_5:4:0:16 +atgaaaaaatttgatagctgtcatcctgtctttttactgatcggctgtgcccaggtgcctctccgttcctccgtgagcaaaccggtacagcaacctagcgctcagaaagagcaactggccaacgcaaatggtattgatgagtgtcagtcacttccgtatgtgccttcagaccttgcgaagaataaatcattatcaaaccagatcgctgataataccgcatcaaaaaatagcgcaatcagctcacgcattttttgcgaaaaatataagcaaaccaaagaacaggcgcttaccttcttccaggaacttccacaatacatgcgttcgaaagaagttgaagagcaacacatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa +>locus_6:5:0:17 +atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa +>locus_7:6:0:18 +gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggactcacaacatgcctgtagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgccagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcgccatccagtcggcaacctggttttcggcaggcaggtcgcgtagggcggttaaaaaaatgatggattgcggcagcggtatttattcttcaatcaatacgttactgaaaaattcgcagattaaaaatatagtcattttaacccataatcattgtttgacgtatattgtaaaaaataagcgcggcgtgaagtttgatccggagtatcttaatgcgctcgttatgtatgcggaaaacggaaaactccttttagacggtgaatttgttcccggctag +>locus_8:7:0:19 +atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga +>locus_9:8:0:20 +atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/extract/G12/seq_data.txt b/tests/test_data/outputs/extract/G12/seq_data.txt new file mode 100755 index 0000000..6199715 --- /dev/null +++ b/tests/test_data/outputs/extract/G12/seq_data.txt @@ -0,0 +1,22 @@ +id seqid locus_name query_id qlen start end sub_start sub_ent ident qcovs bitscore reverse complement is_complete is_trunc fivep_trunc threep_trunc is_extended is_5p_extended is_3p_extended seq start_codon stop_codon is_stop_valid is_start_valid is_cds_valid +0 0 locus_1 0 102 23742 23844 23843 23742 100.0 100 189 True False True False False False False False False atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa atg taa True True True +1 0 locus_10 9 762 22969 23731 23730 22969 100.0 100 1408 True False True False False False False False False atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa atg taa True True True +2 0 locus_11 10 858 22100 22958 22957 22100 100.0 100 1585 True False True False False False False False False gtgaacctggtgaaaaccccgggtctgcacgcggcgggcaaaggcattaacgttgccaatgtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccaacttcaactttcccggctttgacgtgactcccgcagactgggaacgctttgttaacgacttcctgagctggctgggtcagttcgatatggtctgcgtaagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatcgttgatactagccgtgtagagttagtcgccggtcttatagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcccgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgcgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggaaaataccgtcggcgcaggcggttccatggttggcggatggatttacggcctgctgatgcgcgagtccaccgaacatacgctgcgcctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga gtg tga True True True +3 0 locus_12 11 972 11 983 982 11 100.0 100 1796 True False True False False False False False False atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga atg tga True True True +4 0 locus_12 11 972 21117 22089 22088 21117 100.0 100 1796 True False True False False False False False False atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga atg tga True True True +5 0 locus_13 12 1098 20008 21106 21105 20008 100.0 100 2028 True False True False False False False False False atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga atg tga True True True +6 0 locus_14 13 1281 18716 19997 19996 18716 100.0 100 2366 True False True False False False False False False ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctattacgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccagcgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcggaaaggcgggattggttcgcgcggtccgggtgaaacccagctcgaagccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgagaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgcagctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaattccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtccgcaggtacgaatgccgattgtcgactggcgtcgcttctgtaaacaagaaccagcgttgatcgaatacgtgatctag ttg tag True True True +7 0 locus_15 14 1434 17271 18705 18704 17271 100.0 100 2649 True False True False False False False False False gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga gtg tga True True True +8 0 locus_16 15 1464 15796 17260 17259 15796 100.0 100 2704 True False True False False False False False False atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctcgtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatctgcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacggtgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcactcaaccggaagggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaaggctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgatgcgctgtcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtaagctgatcagccttgtgcgcaaccttaatcaactgctcaaaagcgagcacgaacgttataacaaataccgcacgagcctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatctgcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcatagcccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttctggccgacagcatcccacacagaaagaggaataa atg taa True True True +9 0 locus_17 16 1836 13949 15785 15784 13949 100.0 100 3391 True False True False False False False False False atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa atg taa True True True +10 0 locus_18 17 1914 12024 13938 13937 12024 100.0 100 3535 True False True False False False False False False atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag atg tag True True True +11 0 locus_19 18 2037 9976 12013 12012 9976 100.0 100 3762 True False True False False False False False False atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacatcgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccacagtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggtcgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattacttgaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcatacagggcggtacggaagtttgtcagcaaaaactggataacatgcgcaacgcaggcgtaaaagtgtacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggggtcagcgcagagctcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacagaagaccggcaagctcggcgagatcctgttctttatgcgggcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccagcaatgactggcagttcgacggcgacgcggaaactattgcacactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgagatttccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa atg taa True True True +12 0 locus_2 1 285 9680 9965 9964 9680 100.0 100 527 True False True False False False False False False atgaaactttttctgacaacggcagcgctaactgcgaccttgacttccggcatgggatttgccagcgataccgtgatcccctgggcgactaacagtggcggtacggaaagcacgcatatcgcggcaatgggcgaggatgtgaatgcgcagcatcagcacatcatccacacgcatgaaggcgtatgtgccgcaaattccggcaccattcaggctgatgaagccgcgctgaccagcaacaagccaccggtacaagtccaaccggaattgttgccgcatcagggctaa atg taa True True True +13 0 locus_20 19 4935 4734 9669 9668 4734 100.0 100 9114 True False True False False False False False False atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa atg taa True True True +14 0 locus_3 2 327 4396 4723 4722 4396 100.0 100 604 True False True False False False False False False atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa atg taa True True True +15 0 locus_4 3 417 3968 4385 4384 3968 100.0 100 771 True False True False False False False False False ctgaaactaatacctttttatttgctcgcgctattttctgcctcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaacccctgcagataacccgccttataccgtgacaatattgccgccagtagaagcctgcggggaaaaatgtgtgaaactgaacgtagagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaacgacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctacaaccgacagctgcaattacttagggctggcgtaataccgaaaaataaaaagtggaaatggataacgaagatggaaaattactggttcaggcgggtaaaagcgcactaa ctg taa True True True +16 0 locus_5 4 444 3513 3957 3956 3513 100.0 100 821 True False True False False False False False False atgaaaaaatttgatagctgtcatcctgtctttttactgatcggctgtgcccaggtgcctctccgttcctccgtgagcaaaccggtacagcaacctagcgctcagaaagagcaactggccaacgcaaatggtattgatgagtgtcagtcacttccgtatgtgccttcagaccttgcgaagaataaatcattatcaaaccagatcgctgataataccgcatcaaaaaatagcgcaatcagctcacgcattttttgcgaaaaatataagcaaaccaaagaacaggcgcttaccttcttccaggaacttccacaatacatgcgttcgaaagaagttgaagagcaacacatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa atg taa True True True +17 0 locus_6 5 543 2959 3502 3501 2959 100.0 100 1003 True False True False False False False False False atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa atg taa True True True +18 0 locus_7 6 606 2342 2948 2947 2342 100.0 100 1120 True False True False False False False False False gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggactcacaacatgcctgtagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgccagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcgccatccagtcggcaacctggttttcggcaggcaggtcgcgtagggcggttaaaaaaatgatggattgcggcagcggtatttattcttcaatcaatacgttactgaaaaattcgcagattaaaaatatagtcattttaacccataatcattgtttgacgtatattgtaaaaaataagcgcggcgtgaagtttgatccggagtatcttaatgcgctcgttatgtatgcggaaaacggaaaactccttttagacggtgaatttgttcccggctag gtg tag True True True +19 0 locus_8 7 642 1689 2331 2330 1689 100.0 100 1186 True False True False False False False False False atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga atg tga True True True +20 0 locus_9 8 684 994 1678 1677 994 100.0 100 1264 True False True False False False False False False atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag atg tag True True True diff --git a/tests/test_data/outputs/extract/G13/blast/hsps.txt b/tests/test_data/outputs/extract/G13/blast/hsps.txt new file mode 100755 index 0000000..8a42dd7 --- /dev/null +++ b/tests/test_data/outputs/extract/G13/blast/hsps.txt @@ -0,0 +1,21 @@ +0 0 102 23981 1 102 12 113 102 0 100.000 100 100 plus 1.93e-51 189 +1 0 285 23981 1 285 13891 14175 285 17 94.035 100 100 plus 2.48e-124 433 +2 0 327 23981 1 327 19133 19459 327 0 100.000 100 100 plus 5.73e-176 604 +3 0 417 23981 1 417 19471 19887 417 11 97.362 100 100 plus 0.0 710 +4 0 444 23981 1 444 19899 20342 444 15 96.622 100 100 plus 0.0 737 +5 0 543 23981 1 543 20354 20896 543 0 100.000 100 100 plus 0.0 1003 +6 0 606 23981 1 606 20908 21513 606 15 97.525 100 100 plus 0.0 1037 +7 0 642 23981 1 642 21525 22166 642 0 100.000 100 100 plus 0.0 1186 +8 0 684 23981 1 684 22178 22861 684 0 100.000 100 100 plus 0.0 1264 +9 0 762 23981 1 762 125 886 762 0 100.000 100 100 plus 0.0 1408 +10 0 858 23981 1 858 898 1755 858 19 97.786 100 100 plus 0.0 1480 +11 0 972 23981 1 972 1767 2738 972 0 100.000 100 100 plus 0.0 1796 +12 0 1098 23981 1 1098 2750 3847 1098 0 100.000 100 100 plus 0.0 2028 +12 0 1098 23981 1 1098 22873 23970 1098 0 100.000 100 100 plus 0.0 2028 +13 0 1281 23981 1 1281 3859 5139 1281 11 99.141 100 100 plus 0.0 2305 +14 0 1434 23981 1 1434 5151 6584 1434 0 100.000 100 100 plus 0.0 2649 +15 0 1464 23981 1 1464 6596 8059 1464 15 98.975 100 100 plus 0.0 2621 +16 0 1836 23981 1 1836 8071 9906 1836 0 100.000 100 100 plus 0.0 3391 +17 0 1914 23981 1 1914 9918 11831 1914 0 100.000 100 100 plus 0.0 3535 +18 0 2037 23981 1 2037 11843 13879 2037 16 99.215 100 100 plus 0.0 3674 +19 0 4935 23981 1 4935 14187 19121 4935 0 100.000 100 100 plus 0.0 9114 diff --git a/tests/test_data/outputs/extract/G13/blast_db/contigs.fasta b/tests/test_data/outputs/extract/G13/blast_db/contigs.fasta new file mode 100755 index 0000000..d80ce63 --- /dev/null +++ b/tests/test_data/outputs/extract/G13/blast_db/contigs.fasta @@ -0,0 +1,2 @@ +>0 +nnnnnnnnnnnatgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaannnnnnnnnnnatgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaannnnnnnnnnngtgaacctggtgaaaaccacgggtctgcacgcggcgggcaaaggcattaacgttgccaaagtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccgacttcaacttttccggctttgacgtgactcccgcagactgggaacgctttgttaacgactccctgagctggctgggtcagttcgatatggtctgcgtcagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatctttgatagtagccgtgaagcgttagtcgccggtcttaaagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcacgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgtgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggtaagtaccgtcggcgcaggcgattccatggttggcggattgatttacggcctgctgatgcgcgagtccagcgaacatacgctgcgtctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactgannnnnnnnnnnatgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatgannnnnnnnnnnatgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctgannnnnnnnnnnttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctcttccgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccaacgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcagaaaggcgggattggtttgcgcggtccgggtgaaacccagctcgaaaccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgaaaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgctgctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaaatccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtctgcaggtacgaatgccgattgtcgactggcgtcgcctctgtaaacaagaaccagcgttgatcgaatacgtgatctagnnnnnnnnnnngtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatgannnnnnnnnnnatgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctggtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatcagcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacgctgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcattcaaccggaatggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaacgctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgaggcgctggcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtgagctgaccagccttgtgcgcaaccttaatcaactgctcaaaagcgagcgcgaacgttataacaaataccgcacgaccctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatcagcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcataccccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttttggccgacagcatcccacacagaaagaggaataannnnnnnnnnnatgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataannnnnnnnnnnatgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtagnnnnnnnnnnnatgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacattgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccaccgtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggccgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattactggaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcattcagggcggtacggaagtttgtcagcaaaaactggataccatgcgcaacgcaggcgtaaaagtgaacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggcgtcagcgcagagatcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacaggagaccggcaagctcggcgagatcctgttctttatgcgtgcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccggcaataactggcagttcgacggcgacgcggaaactattgcccactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgaaattaccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaannnnnnnnnnnatgaaactttttctgacaacggcagcgctaactgcgaccttgatttccggcatggcatttgccagcgatcccgtcatcccctgggcgactaacagcggcggtacggaaagcacgcatattgcggcaatgggcgaggatttgaatgcgcagcatcagcagatcacccacacgcatgaaggcgtatgggcggccaattccggcagcattcaggctgatgaagccgcgctgaccagcaacaagccgccggtacaaggccacccggaattgatgccgcatcagggctaannnnnnnnnnnatgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaannnnnnnnnnnatggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaannnnnnnnnnnctgaaactaatacctttttatttgctcgcgctattttctgccgcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaaccactgcagataacccgccttataccatgacaatattgccgccagtagaaggctgcggggaaaaatgtgtgaaactgaacgtacagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaaggacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctataaccgacagctgcaattactttgggctggcgtaataccgaaaaataaaaaggggaaatggataacgaaaatggaaaattactggttcaggcaggtaaaagcgcactaannnnnnnnnnnatgaaaaaattttatagctgtcttcctgtctttttactgatcggctgtgcccaggtgcccctcccttcctccgtgagcaaaccggtacagcaacctggcgctcagaaagagcaactggccaacgcaaatagtattgatgagtgtcagtctcttccgtatgtgccgtcagaccttgcgaagaataaatcattatcaaaccagaacgctgataattccgcatcaaaaaatagcgcaatcagctcaagcattttttgcgaaaaatataaacaaaccaaagaacaggcgcttaccttcttccaggaacatccacaatacatgcgttcgaaagaagatgaagagcaactcatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataannnnnnnnnnnatgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaannnnnnnnnnngtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggcctcacaacatgcctggagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgtcagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcaccatccagtcggcaacctggttttccgcaggcaggtcgcttacggcggataaaaaaatgatggattgcggcagcggtatttatgcttcaatcaatacgttactgaaaaagtcgcagaataaaaatatagtcatttttacccataatcattgtttgacgtatattgcaaaaaataagcgcggcgtgaagtttgatccggactatcttaatgcgctcgttatgtatgcggaaaacggaaaactctttttagacggtgaatttgttcccggctagnnnnnnnnnnnatgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatgannnnnnnnnnnatgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctagnnnnnnnnnnnatgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctgannnnnnnnnnn diff --git a/tests/test_data/outputs/extract/G13/blast_db/contigs.fasta.ndb b/tests/test_data/outputs/extract/G13/blast_db/contigs.fasta.ndb new file mode 100755 index 0000000000000000000000000000000000000000..5fd6f7085890c929e9fa647574a304e6e7c1d317 GIT binary patch literal 20480 zcmeI&O-jR15CGt@3T7h_@eblfJc3sVVx^$TqQ!Nu)s6HdF1xii(B|hgQK1OcMd8aL zZ(j2%Z#3(Lfo-gxxPK5vg0t5&UAV7cs0RjXF5cpex+fBMroW@J_R2I( literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G13/blast_db/contigs.fasta.njs b/tests/test_data/outputs/extract/G13/blast_db/contigs.fasta.njs new file mode 100755 index 0000000..fff3efe --- /dev/null +++ b/tests/test_data/outputs/extract/G13/blast_db/contigs.fasta.njs @@ -0,0 +1,22 @@ +{ + "version": "1.2", + "dbname": "contigs.fasta", + "dbtype": "Nucleotide", + "db-version": 5, + "description": "locidex/extract/G13/blast_db/contigs.fasta", + "number-of-letters": 23981, + "number-of-sequences": 1, + "last-updated": "2024-06-10T11:11:00", + "number-of-volumes": 1, + "bytes-total": 43197, + "bytes-to-cache": 6241, + "files": [ + "contigs.fasta.ndb", + "contigs.fasta.nhr", + "contigs.fasta.nin", + "contigs.fasta.not", + "contigs.fasta.nsq", + "contigs.fasta.ntf", + "contigs.fasta.nto" + ] +} diff --git a/tests/test_data/outputs/extract/G13/blast_db/contigs.fasta.not b/tests/test_data/outputs/extract/G13/blast_db/contigs.fasta.not new file mode 100755 index 0000000000000000000000000000000000000000..d6562660b009ba390419e760e6e10a80c529e3d8 GIT binary patch literal 20 OcmZQ%fB;4)4Wa-5Gynqt literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G13/blast_db/contigs.fasta.nsq b/tests/test_data/outputs/extract/G13/blast_db/contigs.fasta.nsq new file mode 100755 index 0000000000000000000000000000000000000000..1e96f4350d71e7775a0b179c413b048541b21686 GIT binary patch literal 6089 zcmWNTdpwhi1IAysF;N}4jdP?h>B?~(ayd@2IaKP%=A`XxduN6npE|Nva#xC38*A^T zrSOX8QY3cdQiqP;NlGQ7SgsqTB;^*r^T+r3D)9no%;s z7}`3Qn&(M%OE&VnE;RLt>zQV6Jh?Fr?9W?a_t$rb?`A@xv8LAxvShlLT2kHcredM& z6YI~j7N_vuV-_5{rgLEG%J(F9(@(ToO#!hfks}ix4U2AXddbgl z#mJm|9+j$zF=JZh6P3}O1qwv&;n4La4#O76eRW_XFx0EI&#EO$awTI{<$(bJOrhbS^zf4 zhV1iCQ~R&IZTgdT{w8MaBjx2N1U1~xo8dHwyXTndrn@DQXB5?235Or{KJ<|L;=guH z*iQ=9S@dLNMU4ho0qU0cR^E!W4^LCtG%ZfTm=;j`mfPvsrf<@!Xa{(<7Ik7rW+_AC z0McqH+a1Z|ixxbWqZe11oH6)P82J~51fn3VM5(wh%+~9ccf`<%edW9{B!1wmKf6Pj z`p`LD&++!tU(Y5@O&E{Z$)*+}#_tI~P6z*Q%go=~WVj{%!^E{P7Bm#PKHuvD`lSVi z1t?CtCJcK67l4<=DxWrf0j`Q#UH?sqgwU~!%GM*PeoSv);&940CTEtyUJgN2Hvofm zOepLP(N+{}w zwsLnGgw=Vm3h#2S1Fri_@JfFpLkvO%&Lu_voIgPPlJ;_Zx6U8Ic*Q|^P~ZuyBad{G-Eev<}C9JW<`WpoO&a~ zNq+8UYU;+q8$8G<@innac{!tjj|Szi&>J^%e(86@K$z=&Pyw#~efX)ZaBWD;Iketeyj2Pm#9oxZJCe(YV}#g?&bF8c`Den7tpiuqVkKo z$kP`xeimUfCY5t4zE0fzdn4^c!*!VDhlDY1GYTqmT^mnOXo5q;n4ir)AQ_#lkfXnB zQ+M_5(S$Z`$w|`otVd;(Tb! zZW$9c@x_m1KfhjA==tTTw*9%IE%QwE2$9u@-bfhhlnL7})514weOIIZfWNp&jE>i& zsQIkemBr_R(HBKKL^>+Til{keo=PK)xc}3;cFYpKeKe!4m9Qvg4#(IsYwSvpp)IW%BfTMj28rhQ5QChqEVv+gs%|%B(&_e=#k2cJ5eOy#qo7R zn}Zo(^%JHIzIPI%`qi>)$j1xI9aNfrMe@1b1(Oz9t+zyOz7Geu|iQdo_MEi9mdYjw9N0O zhF?Pb1;cj->Dc0)YTKxLOT~L^cZBQ`iI!w1VtHg$rWFBBgq z_9J&$prA5COj%VN&22#7r|}?0O5Y}I(cbF&D){+0LZXcZL_FibMEqPu76Z;9#FE~Y z%)v1(Lrnfv`jy3MoG7$ybJXESfkY5$z7HGILyA;dbFUMi!LyL24v`(`aRRfcpQ1!vcYcI4#||~>SZmk^p825mKnts<#b>Vta?IPy zi-~EA@nDoZ@o~D=O)Ud0e5?JHH23Q`&2Zzl zQeShW1TEN^QB~(jkY`Lv4EHHWW_w4;30r)|4gJp&Xl^e}9 z`9UUb@9p6PVoHU<9U<}=s_{ZzZae0cbB#)RzurT%LhiR}G_cFile9E3-7ShyGK3SS*?C=+}8U`;UkyjL|AUzbanm?`C60y8P2@ zavON|=`1d!oxi7a0dK?Xbr*nPPry6OzdoQrT|u})xbja&(i`x!88Pq+fET6WyHNP9UqLsvZ@TO<^^S*4+Ctt1?z zMas-80wW&|Pl^<0%ra@S%_wYFE10@!UaR8}BK$|0zt1)H>vg#Ictp_eO;$X8HC@-e2*Sm*8N+dh7LN&zkgr&7Y|$|pzyr&byYVBzR8*UuH7J?j0ZR^Ul;`5|>v zmDNVmL%9yzW?OUMvp9ryh9Of{-j#VQ!Kfd>Mc zut-kymOOt*^j7wZq0y}*-+T<1v~FUD`a&AYiLtMFO#FbU6fwha=p#QWlL>TY3=cMa? zRyw;1LRSr-OBU5N5N)FDZ6tOq?w#knrm6Dr7b!fZzF<@N%dUCoIa%+$(v3O`>6PbR zNtZ7uAc+LNSN>+_UD;mVD-tu!5cqNh5z%Tmx-}P$OL8CyWHjvm;qrpnO@$4j{e|jA zVK$5I!fmHo?FHy*w!w?$FP4?ay-K;aQWl;V9FaVzJXNkFKRMjtGK~0$CMNX+wVi?p zR)OiBX)y92Y}|Ew=8X0{Q;zM({H`>DcwDMCNra_-?((!K{)O=eQJ0Afh9t8>kXN`d zf1JACnfWM%DfzTUz|yCqd8zJruR>+BH4if3=`;)I)Set&UT!IprFFYuYhvY2@-g=$_vcj0k`JCS-Z29v}t?N}Dtripooa~U^R3!1V3^4FbE=amM4jxl zzQCZ}a}$)uN0&OVi&ieiZUez7_)JUagjl8&j z=4_A7FL8xU?b*YOi_8mw#ls>!x`mk1GxPhJX#W#&e53n$!zBAItNWT>A%JzL8-qD6 zhCHLcAqK+{5rIA9{zxC~!gB#Cs_618vY{>^gJE=m^i!fJKY8Rye9W9FzfWub0gu@- z#5ldx>e4xn8R*3OX3qEu1MwgotsZ}db!Jw4cOZE&6wUX$G$m!!P(Q<|7%G=`Oj&07 zYndYyJiyF}*7Z4-S|rFe9qGgsU*}4CIy%F( z(o(->S>TJUDCc}hp@JoReq_k;%JbS{mW5{EFH2>fqS8eiKg2fQj2;gUj1Y1&HD1f1oqyc6Vq+3L-B% z&L{zp!nSAv4||&7opXW=+fxu`4m6ds9`NdutCHaRXZ*}F`QigWjBmA|P1_@feSzEu zEphH8QP_BBCa~3&-h!4IU=DoY-FpI{ypKq@FN&~(sfEIuiYe2ZTG)*9!Upv<)YBua zUg_)oY&6LYifd~qlwUIa_vM?8X+w(PRGMe>aGMj0y*OgXlo)`c8dK7v52)UDQh$fa zdM~6-*(c3P(EahDFrjtzcz51ld_OI+^T$ERITfQBm1Y6Ldv5yJuv3&{nq@f<4KWvi zgsfSARYe{efqSjsy6u(Q0yzV3Fbfrsd*h{#@1<~2h0#yJBTv2*Ve(%`kVL4KSN^4s z;5-ovU=-!+u+gf$bvoOwZ{Ws8#@X4Gpw2t7mGeYZ4~m{$F)O$W`fEg!If zRF%2gr1?sas}Zf)mqJo`BC(OsEyULcZONGqv*)1_Ia#6))l(hM)wf+Jjnry~hXXncz{Gi?G0dKwSC0ULj0)hl~Uy+KTL zxZ2_Vmj-}gF`4(8>T40!u(*?eNX2Z)nwW~1pTvlxHRv)OT!4#$PoN217y-FE$yGxr z4pn{rBQt%n){|DI!e1{~Rv?;wjVp9GxIk#1i38W6n4R2jB53M*!{K35)9@fhrVoQ& zC~tvuukFzm`m_8YdS&zF1M@@uw_#~Un>s^eNT3|T-pk~~Sep!jct^fki`jg)T+KOb z(cWFQB!y$P+n2oyvlT#oaSD9k+|JFM59ksd;c0h-u|<)qwC#zFnNV#u4vU$8i6lD( z2v7S2Gg#gvD%JyF!l2xl=^p+|f(<|3ky*P`Jnf-X6}|m<1?mchTkz@-81p+Julp|j zM&UHy56B9Irz#}S zkN67L)5G-9s4)Mw@q+X$1L!)n8tbQcpSW~~nLZ=ESBtu%rqaBGTt?(;Q%~fdx zxmRTI>2?!0uy@GK!0=fYF5aPZ|L!N5{gnLLS!pD@!J`r%nFGC!f?*jZCa2SW(w|n7 zNxco9mdE%rMWUOWv02PnyW`tD{nBF_hd6ZrZp+)u7bTwxzQ{u*-=cSIN!lJX@W5)| z?dr$3EsIwSbdfa+e1%#0w7%50o}V_@{w~5g#G#G^- z(*EdN@0|iqeVe#Ks<5!pxWe0(G^nl8ILLPlKj{>@xmOtWkHh3mU{+~``nx`TG5!Zr ze|U8%RDbiuTR`Pb6VEZ*HGSlX{JY|1oyVp|oK0J&p;#xf7O*o-)sf~2%+2)i>(G}w zG*WERv{t*{FT7uWlwZ*t)_nZlZj+>43jNrMHg&kXt7=*IP)?7W_>9^!nU|+(bNl9JiM}cz? zd#m)Aa@)Awyn(`uOa5q<;|MWi^IW~l`SsjHclyTqYu~QIUlZGLbHW_TJ6Dx;Llf-Ke0ZF8*WUMtscj$e$>z? z{wDg0RIzKl71PTnHit&#$}Cn-I#xELqQEmMMf?^KI12%g4FtPRX*8&18AEj-eWdhE7 zeCLiD`m9+xj{}Y{qHN2LK6Ms%wdLJpngwuXeOt|@58=g$n>mAW9-XQf_owzD7N*qAdJvD*V zJmg7@n@ADpo@nAwPpH}n3da17N<{}T+(;?nI}~M>lk8sFKrd;m0KhOTC|b)Gj1mmj z3r0Gk6G4W13B=@`2YYBo--?b2AzU0zbO=0BW4^bBV2c%8aMRzYu|-oWvbC|-YIEU- zA{^dlMO_kZ)i%I1@E9Em2)JY2p2fo&uJk>xb!&}yNqDA(-qq&)Ed0c};v6o<&pLSj ztzHI1KhZg!fAgM~)m4L!xAv#>avdl7$waxl}}p{5J2gbz<+^Yubg%CPS>*Q2${=7s;Xa#rx-lzdo0-Q0f9htoO7>+jio9 z*7Mg5RUbU`EvNmQ((|}pco-GU=)4Oa%kLt9NumdTzbaSNeMwSvtj?F--HESt9{bWXvxkxX_0znt zPOGAPo>o7jIn|RdRyG0FdGEP|Q<%)E?Pw#ic!MPK(TyOJ#xEnO8Z`GcdbK8umv%~h z5ZivmpV|D7{q)MsCtJlEOqWz0s5)P4Lw#=7ny12wI~Xnc=z!lk=4)K#Q&cJ}8xnHs z4ZoiOhig7+f+U!Q`p{j$z5cKR007hF|KvXsmI1BAWdJp_4Cvok1~%y~1IAmH0Zh{} zur+xZ!2aj$bzcVD{=@y`GC;V!4E&cWK(D?Lc@1YSi@p{p+HqW9@Fr1T_$d!a|DnwR0}#*Gvzd|71X z{g6z)-S85T*RV^r-#=duIdzJkyQgkAju_6m?CR~ezpidNTg?3z?>t+V5+Fc;009C7 z2oNAZfB*pkw+qz1`!D+c`t+KWnseOrpYYhnoWK3cJb?fK0t5&UAV7cs0RjXF5J-XS zzayHM#HQV}xgruEK!5-N0t5&UAV7cs0RsO);N@6E6QfwqKVBEpx7F%>xjxnP9Q72v tW~JgB=Xra^(*N(D`o%y literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G13/blast_db/contigs.fasta.nto b/tests/test_data/outputs/extract/G13/blast_db/contigs.fasta.nto new file mode 100755 index 0000000000000000000000000000000000000000..20d5cb86e6dff1f3684dc229a358a2ea697cecfb GIT binary patch literal 8 KcmZQ%fB*mh5C8%I literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G13/filtered.hsps.txt b/tests/test_data/outputs/extract/G13/filtered.hsps.txt new file mode 100755 index 0000000..8db845c --- /dev/null +++ b/tests/test_data/outputs/extract/G13/filtered.hsps.txt @@ -0,0 +1,24 @@ +qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore locus_name ext_start ext_end is_5prime_complete is_3prime_complete is_complete is_5prime_boundary is_3prime_boundary is_on_boundary reverse complement is_extended is_5p_extended is_3p_extended +0 0 102 23981 0 101 11 112 102 0 100.0 100 100 plus 1.93e-51 189 locus_1 11 112 True True True False False False False True False False False +9 0 762 23981 0 761 124 885 762 0 100.0 100 100 plus 0.0 1408 locus_10 124 885 True True True False False False False True False False False +10 0 858 23981 0 857 897 1754 858 19 97.786 100 100 plus 0.0 1480 locus_11 897 1754 True True True False False False False True False False False +11 0 972 23981 0 971 1766 2737 972 0 100.0 100 100 plus 0.0 1796 locus_12 1766 2737 True True True False False False False True False False False +12 0 1098 23981 0 1097 2749 3846 1098 0 100.0 100 100 plus 0.0 2028 locus_13 2749 3846 True True True False False False False True False False False +12 0 1098 23981 0 1097 22872 23969 1098 0 100.0 100 100 plus 0.0 2028 locus_13 22872 23969 True True True False False False False True False False False +12 0 1098 23981 0 1097 2749 3846 1098 0 100.0 100 100 plus 0.0 2028 locus_13 2749 3846 True True True False False False False True False False False +12 0 1098 23981 0 1097 22872 23969 1098 0 100.0 100 100 plus 0.0 2028 locus_13 22872 23969 True True True False False False False True False False False +13 0 1281 23981 0 1280 3858 5138 1281 11 99.141 100 100 plus 0.0 2305 locus_14 3858 5138 True True True False False False False True False False False +14 0 1434 23981 0 1433 5150 6583 1434 0 100.0 100 100 plus 0.0 2649 locus_15 5150 6583 True True True False False False False True False False False +15 0 1464 23981 0 1463 6595 8058 1464 15 98.975 100 100 plus 0.0 2621 locus_16 6595 8058 True True True False False False False True False False False +16 0 1836 23981 0 1835 8070 9905 1836 0 100.0 100 100 plus 0.0 3391 locus_17 8070 9905 True True True False False False False True False False False +17 0 1914 23981 0 1913 9917 11830 1914 0 100.0 100 100 plus 0.0 3535 locus_18 9917 11830 True True True False False False False True False False False +18 0 2037 23981 0 2036 11842 13878 2037 16 99.215 100 100 plus 0.0 3674 locus_19 11842 13878 True True True False False False False True False False False +1 0 285 23981 0 284 13890 14174 285 17 94.035 100 100 plus 2.48e-124 433 locus_2 13890 14174 True True True False False False False True False False False +19 0 4935 23981 0 4934 14186 19120 4935 0 100.0 100 100 plus 0.0 9114 locus_20 14186 19120 True True True False False False False True False False False +2 0 327 23981 0 326 19132 19458 327 0 100.0 100 100 plus 5.73e-176 604 locus_3 19132 19458 True True True False False False False True False False False +3 0 417 23981 0 416 19470 19886 417 11 97.362 100 100 plus 0.0 710 locus_4 19470 19886 True True True False False False False True False False False +4 0 444 23981 0 443 19898 20341 444 15 96.622 100 100 plus 0.0 737 locus_5 19898 20341 True True True False False False False True False False False +5 0 543 23981 0 542 20353 20895 543 0 100.0 100 100 plus 0.0 1003 locus_6 20353 20895 True True True False False False False True False False False +6 0 606 23981 0 605 20907 21512 606 15 97.525 100 100 plus 0.0 1037 locus_7 20907 21512 True True True False False False False True False False False +7 0 642 23981 0 641 21524 22165 642 0 100.0 100 100 plus 0.0 1186 locus_8 21524 22165 True True True False False False False True False False False +8 0 684 23981 0 683 22177 22860 684 0 100.0 100 100 plus 0.0 1264 locus_9 22177 22860 True True True False False False False True False False False diff --git a/tests/test_data/outputs/extract/G13/processed.extracted.seqs.fasta b/tests/test_data/outputs/extract/G13/processed.extracted.seqs.fasta new file mode 100755 index 0000000..922c57c --- /dev/null +++ b/tests/test_data/outputs/extract/G13/processed.extracted.seqs.fasta @@ -0,0 +1,42 @@ +>locus_1:0:0:0 +atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa +>locus_10:9:0:1 +atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa +>locus_11:10:0:2 +gtgaacctggtgaaaaccacgggtctgcacgcggcgggcaaaggcattaacgttgccaaagtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccgacttcaacttttccggctttgacgtgactcccgcagactgggaacgctttgttaacgactccctgagctggctgggtcagttcgatatggtctgcgtcagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatctttgatagtagccgtgaagcgttagtcgccggtcttaaagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcacgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgtgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggtaagtaccgtcggcgcaggcgattccatggttggcggattgatttacggcctgctgatgcgcgagtccagcgaacatacgctgcgtctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga +>locus_12:11:0:3 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>locus_13:12:0:4 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>locus_13:12:0:5 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>locus_14:13:0:6 +ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctcttccgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccaacgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcagaaaggcgggattggtttgcgcggtccgggtgaaacccagctcgaaaccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgaaaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgctgctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaaatccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtctgcaggtacgaatgccgattgtcgactggcgtcgcctctgtaaacaagaaccagcgttgatcgaatacgtgatctag +>locus_15:14:0:7 +gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga +>locus_16:15:0:8 +atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctggtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatcagcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacgctgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcattcaaccggaatggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaacgctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgaggcgctggcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtgagctgaccagccttgtgcgcaaccttaatcaactgctcaaaagcgagcgcgaacgttataacaaataccgcacgaccctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatcagcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcataccccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttttggccgacagcatcccacacagaaagaggaataa +>locus_17:16:0:9 +atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa +>locus_18:17:0:10 +atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag +>locus_19:18:0:11 +atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacattgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccaccgtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggccgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattactggaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcattcagggcggtacggaagtttgtcagcaaaaactggataccatgcgcaacgcaggcgtaaaagtgaacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggcgtcagcgcagagatcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacaggagaccggcaagctcggcgagatcctgttctttatgcgtgcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccggcaataactggcagttcgacggcgacgcggaaactattgcccactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgaaattaccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa +>locus_2:1:0:12 +atgaaactttttctgacaacggcagcgctaactgcgaccttgatttccggcatggcatttgccagcgatcccgtcatcccctgggcgactaacagcggcggtacggaaagcacgcatattgcggcaatgggcgaggatttgaatgcgcagcatcagcagatcacccacacgcatgaaggcgtatgggcggccaattccggcagcattcaggctgatgaagccgcgctgaccagcaacaagccgccggtacaaggccacccggaattgatgccgcatcagggctaa +>locus_20:19:0:13 +atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa +>locus_3:2:0:14 +atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa +>locus_4:3:0:15 +ctgaaactaatacctttttatttgctcgcgctattttctgccgcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaaccactgcagataacccgccttataccatgacaatattgccgccagtagaaggctgcggggaaaaatgtgtgaaactgaacgtacagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaaggacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctataaccgacagctgcaattactttgggctggcgtaataccgaaaaataaaaaggggaaatggataacgaaaatggaaaattactggttcaggcaggtaaaagcgcactaa +>locus_5:4:0:16 +atgaaaaaattttatagctgtcttcctgtctttttactgatcggctgtgcccaggtgcccctcccttcctccgtgagcaaaccggtacagcaacctggcgctcagaaagagcaactggccaacgcaaatagtattgatgagtgtcagtctcttccgtatgtgccgtcagaccttgcgaagaataaatcattatcaaaccagaacgctgataattccgcatcaaaaaatagcgcaatcagctcaagcattttttgcgaaaaatataaacaaaccaaagaacaggcgcttaccttcttccaggaacatccacaatacatgcgttcgaaagaagatgaagagcaactcatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa +>locus_6:5:0:17 +atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa +>locus_7:6:0:18 +gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggcctcacaacatgcctggagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgtcagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcaccatccagtcggcaacctggttttccgcaggcaggtcgcttacggcggataaaaaaatgatggattgcggcagcggtatttatgcttcaatcaatacgttactgaaaaagtcgcagaataaaaatatagtcatttttacccataatcattgtttgacgtatattgcaaaaaataagcgcggcgtgaagtttgatccggactatcttaatgcgctcgttatgtatgcggaaaacggaaaactctttttagacggtgaatttgttcccggctag +>locus_8:7:0:19 +atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga +>locus_9:8:0:20 +atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/extract/G13/raw.extracted.seqs.fasta b/tests/test_data/outputs/extract/G13/raw.extracted.seqs.fasta new file mode 100755 index 0000000..922c57c --- /dev/null +++ b/tests/test_data/outputs/extract/G13/raw.extracted.seqs.fasta @@ -0,0 +1,42 @@ +>locus_1:0:0:0 +atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa +>locus_10:9:0:1 +atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa +>locus_11:10:0:2 +gtgaacctggtgaaaaccacgggtctgcacgcggcgggcaaaggcattaacgttgccaaagtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccgacttcaacttttccggctttgacgtgactcccgcagactgggaacgctttgttaacgactccctgagctggctgggtcagttcgatatggtctgcgtcagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatctttgatagtagccgtgaagcgttagtcgccggtcttaaagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcacgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgtgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggtaagtaccgtcggcgcaggcgattccatggttggcggattgatttacggcctgctgatgcgcgagtccagcgaacatacgctgcgtctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga +>locus_12:11:0:3 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>locus_13:12:0:4 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>locus_13:12:0:5 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>locus_14:13:0:6 +ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctcttccgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccaacgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcagaaaggcgggattggtttgcgcggtccgggtgaaacccagctcgaaaccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgaaaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgctgctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaaatccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtctgcaggtacgaatgccgattgtcgactggcgtcgcctctgtaaacaagaaccagcgttgatcgaatacgtgatctag +>locus_15:14:0:7 +gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga +>locus_16:15:0:8 +atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctggtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatcagcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacgctgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcattcaaccggaatggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaacgctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgaggcgctggcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtgagctgaccagccttgtgcgcaaccttaatcaactgctcaaaagcgagcgcgaacgttataacaaataccgcacgaccctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatcagcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcataccccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttttggccgacagcatcccacacagaaagaggaataa +>locus_17:16:0:9 +atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa +>locus_18:17:0:10 +atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag +>locus_19:18:0:11 +atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacattgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccaccgtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggccgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattactggaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcattcagggcggtacggaagtttgtcagcaaaaactggataccatgcgcaacgcaggcgtaaaagtgaacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggcgtcagcgcagagatcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacaggagaccggcaagctcggcgagatcctgttctttatgcgtgcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccggcaataactggcagttcgacggcgacgcggaaactattgcccactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgaaattaccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa +>locus_2:1:0:12 +atgaaactttttctgacaacggcagcgctaactgcgaccttgatttccggcatggcatttgccagcgatcccgtcatcccctgggcgactaacagcggcggtacggaaagcacgcatattgcggcaatgggcgaggatttgaatgcgcagcatcagcagatcacccacacgcatgaaggcgtatgggcggccaattccggcagcattcaggctgatgaagccgcgctgaccagcaacaagccgccggtacaaggccacccggaattgatgccgcatcagggctaa +>locus_20:19:0:13 +atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa +>locus_3:2:0:14 +atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa +>locus_4:3:0:15 +ctgaaactaatacctttttatttgctcgcgctattttctgccgcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaaccactgcagataacccgccttataccatgacaatattgccgccagtagaaggctgcggggaaaaatgtgtgaaactgaacgtacagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaaggacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctataaccgacagctgcaattactttgggctggcgtaataccgaaaaataaaaaggggaaatggataacgaaaatggaaaattactggttcaggcaggtaaaagcgcactaa +>locus_5:4:0:16 +atgaaaaaattttatagctgtcttcctgtctttttactgatcggctgtgcccaggtgcccctcccttcctccgtgagcaaaccggtacagcaacctggcgctcagaaagagcaactggccaacgcaaatagtattgatgagtgtcagtctcttccgtatgtgccgtcagaccttgcgaagaataaatcattatcaaaccagaacgctgataattccgcatcaaaaaatagcgcaatcagctcaagcattttttgcgaaaaatataaacaaaccaaagaacaggcgcttaccttcttccaggaacatccacaatacatgcgttcgaaagaagatgaagagcaactcatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa +>locus_6:5:0:17 +atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa +>locus_7:6:0:18 +gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggcctcacaacatgcctggagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgtcagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcaccatccagtcggcaacctggttttccgcaggcaggtcgcttacggcggataaaaaaatgatggattgcggcagcggtatttatgcttcaatcaatacgttactgaaaaagtcgcagaataaaaatatagtcatttttacccataatcattgtttgacgtatattgcaaaaaataagcgcggcgtgaagtttgatccggactatcttaatgcgctcgttatgtatgcggaaaacggaaaactctttttagacggtgaatttgttcccggctag +>locus_8:7:0:19 +atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga +>locus_9:8:0:20 +atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/extract/G13/seq_data.txt b/tests/test_data/outputs/extract/G13/seq_data.txt new file mode 100755 index 0000000..68a237b --- /dev/null +++ b/tests/test_data/outputs/extract/G13/seq_data.txt @@ -0,0 +1,22 @@ +id seqid locus_name query_id qlen start end sub_start sub_ent ident qcovs bitscore reverse complement is_complete is_trunc fivep_trunc threep_trunc is_extended is_5p_extended is_3p_extended seq start_codon stop_codon is_stop_valid is_start_valid is_cds_valid +0 0 locus_1 0 102 11 113 11 112 100.0 100 189 False True True False False False False False False atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa atg taa True True True +1 0 locus_10 9 762 124 886 124 885 100.0 100 1408 False True True False False False False False False atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa atg taa True True True +2 0 locus_11 10 858 897 1755 897 1754 97.786 100 1480 False True True False False False False False False gtgaacctggtgaaaaccacgggtctgcacgcggcgggcaaaggcattaacgttgccaaagtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccgacttcaacttttccggctttgacgtgactcccgcagactgggaacgctttgttaacgactccctgagctggctgggtcagttcgatatggtctgcgtcagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatctttgatagtagccgtgaagcgttagtcgccggtcttaaagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcacgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgtgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggtaagtaccgtcggcgcaggcgattccatggttggcggattgatttacggcctgctgatgcgcgagtccagcgaacatacgctgcgtctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga gtg tga True True True +3 0 locus_12 11 972 1766 2738 1766 2737 100.0 100 1796 False True True False False False False False False atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga atg tga True True True +4 0 locus_13 12 1098 2749 3847 2749 3846 100.0 100 2028 False True True False False False False False False atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga atg tga True True True +5 0 locus_13 12 1098 22872 23970 22872 23969 100.0 100 2028 False True True False False False False False False atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga atg tga True True True +6 0 locus_14 13 1281 3858 5139 3858 5138 99.141 100 2305 False True True False False False False False False ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctcttccgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccaacgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcagaaaggcgggattggtttgcgcggtccgggtgaaacccagctcgaaaccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgaaaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgctgctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaaatccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtctgcaggtacgaatgccgattgtcgactggcgtcgcctctgtaaacaagaaccagcgttgatcgaatacgtgatctag ttg tag True True True +7 0 locus_15 14 1434 5150 6584 5150 6583 100.0 100 2649 False True True False False False False False False gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga gtg tga True True True +8 0 locus_16 15 1464 6595 8059 6595 8058 98.975 100 2621 False True True False False False False False False atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctggtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatcagcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacgctgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcattcaaccggaatggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaacgctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgaggcgctggcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtgagctgaccagccttgtgcgcaaccttaatcaactgctcaaaagcgagcgcgaacgttataacaaataccgcacgaccctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatcagcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcataccccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttttggccgacagcatcccacacagaaagaggaataa atg taa True True True +9 0 locus_17 16 1836 8070 9906 8070 9905 100.0 100 3391 False True True False False False False False False atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa atg taa True True True +10 0 locus_18 17 1914 9917 11831 9917 11830 100.0 100 3535 False True True False False False False False False atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag atg tag True True True +11 0 locus_19 18 2037 11842 13879 11842 13878 99.215 100 3674 False True True False False False False False False atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacattgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccaccgtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggccgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattactggaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcattcagggcggtacggaagtttgtcagcaaaaactggataccatgcgcaacgcaggcgtaaaagtgaacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggcgtcagcgcagagatcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacaggagaccggcaagctcggcgagatcctgttctttatgcgtgcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccggcaataactggcagttcgacggcgacgcggaaactattgcccactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgaaattaccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa atg taa True True True +12 0 locus_2 1 285 13890 14175 13890 14174 94.035 100 433 False True True False False False False False False atgaaactttttctgacaacggcagcgctaactgcgaccttgatttccggcatggcatttgccagcgatcccgtcatcccctgggcgactaacagcggcggtacggaaagcacgcatattgcggcaatgggcgaggatttgaatgcgcagcatcagcagatcacccacacgcatgaaggcgtatgggcggccaattccggcagcattcaggctgatgaagccgcgctgaccagcaacaagccgccggtacaaggccacccggaattgatgccgcatcagggctaa atg taa True True True +13 0 locus_20 19 4935 14186 19121 14186 19120 100.0 100 9114 False True True False False False False False False atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa atg taa True True True +14 0 locus_3 2 327 19132 19459 19132 19458 100.0 100 604 False True True False False False False False False atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa atg taa True True True +15 0 locus_4 3 417 19470 19887 19470 19886 97.362 100 710 False True True False False False False False False ctgaaactaatacctttttatttgctcgcgctattttctgccgcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaaccactgcagataacccgccttataccatgacaatattgccgccagtagaaggctgcggggaaaaatgtgtgaaactgaacgtacagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaaggacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctataaccgacagctgcaattactttgggctggcgtaataccgaaaaataaaaaggggaaatggataacgaaaatggaaaattactggttcaggcaggtaaaagcgcactaa ctg taa True True True +16 0 locus_5 4 444 19898 20342 19898 20341 96.622 100 737 False True True False False False False False False atgaaaaaattttatagctgtcttcctgtctttttactgatcggctgtgcccaggtgcccctcccttcctccgtgagcaaaccggtacagcaacctggcgctcagaaagagcaactggccaacgcaaatagtattgatgagtgtcagtctcttccgtatgtgccgtcagaccttgcgaagaataaatcattatcaaaccagaacgctgataattccgcatcaaaaaatagcgcaatcagctcaagcattttttgcgaaaaatataaacaaaccaaagaacaggcgcttaccttcttccaggaacatccacaatacatgcgttcgaaagaagatgaagagcaactcatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa atg taa True True True +17 0 locus_6 5 543 20353 20896 20353 20895 100.0 100 1003 False True True False False False False False False atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa atg taa True True True +18 0 locus_7 6 606 20907 21513 20907 21512 97.525 100 1037 False True True False False False False False False gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggcctcacaacatgcctggagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgtcagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcaccatccagtcggcaacctggttttccgcaggcaggtcgcttacggcggataaaaaaatgatggattgcggcagcggtatttatgcttcaatcaatacgttactgaaaaagtcgcagaataaaaatatagtcatttttacccataatcattgtttgacgtatattgcaaaaaataagcgcggcgtgaagtttgatccggactatcttaatgcgctcgttatgtatgcggaaaacggaaaactctttttagacggtgaatttgttcccggctag gtg tag True True True +19 0 locus_8 7 642 21524 22166 21524 22165 100.0 100 1186 False True True False False False False False False atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga atg tga True True True +20 0 locus_9 8 684 22177 22861 22177 22860 100.0 100 1264 False True True False False False False False False atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag atg tag True True True diff --git a/tests/test_data/outputs/extract/G14/blast/hsps.txt b/tests/test_data/outputs/extract/G14/blast/hsps.txt new file mode 100755 index 0000000..2033b79 --- /dev/null +++ b/tests/test_data/outputs/extract/G14/blast/hsps.txt @@ -0,0 +1,21 @@ +0 0 102 23981 1 102 12 113 102 0 100.000 100 100 plus 1.93e-51 189 +1 0 285 23981 1 285 13891 14175 285 0 100.000 100 100 plus 1.10e-152 527 +2 0 327 23981 1 327 19133 19459 327 0 100.000 100 100 plus 5.73e-176 604 +3 0 417 23981 1 417 19471 19887 417 0 100.000 100 100 plus 0.0 771 +4 0 444 23981 1 444 19899 20342 444 0 100.000 100 100 plus 0.0 821 +5 0 543 23981 1 543 20354 20896 543 0 100.000 100 100 plus 0.0 1003 +6 0 606 23981 1 606 20908 21513 606 0 100.000 100 100 plus 0.0 1120 +7 0 642 23981 1 642 21525 22166 642 0 100.000 100 100 plus 0.0 1186 +8 0 684 23981 1 684 22178 22861 684 0 100.000 100 100 plus 0.0 1264 +9 0 762 23981 1 762 125 886 762 0 100.000 100 100 plus 0.0 1408 +10 0 858 23981 1 858 898 1755 858 0 100.000 100 100 plus 0.0 1585 +11 0 972 23981 1 972 1767 2738 972 0 100.000 100 100 plus 0.0 1796 +12 0 1098 23981 1 1098 2750 3847 1098 0 100.000 100 100 plus 0.0 2028 +12 0 1098 23981 1 1098 22873 23970 1098 0 100.000 100 100 plus 0.0 2028 +13 0 1281 23981 1 1281 3859 5139 1281 0 100.000 100 100 plus 0.0 2366 +14 0 1434 23981 1 1434 5151 6584 1434 0 100.000 100 100 plus 0.0 2649 +15 0 1464 23981 1 1464 6596 8059 1464 0 100.000 100 100 plus 0.0 2704 +16 0 1836 23981 1 1836 8071 9906 1836 0 100.000 100 100 plus 0.0 3391 +17 0 1914 23981 1 1914 9918 11831 1914 0 100.000 100 100 plus 0.0 3535 +18 0 2037 23981 1 2037 11843 13879 2037 0 100.000 100 100 plus 0.0 3762 +19 0 4935 23981 1 4935 14187 19121 4935 0 100.000 100 100 plus 0.0 9114 diff --git a/tests/test_data/outputs/extract/G14/blast_db/contigs.fasta b/tests/test_data/outputs/extract/G14/blast_db/contigs.fasta new file mode 100755 index 0000000..7af800f --- /dev/null +++ b/tests/test_data/outputs/extract/G14/blast_db/contigs.fasta @@ -0,0 +1,2 @@ +>0 +nnnnnnnnnnnatgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaannnnnnnnnnnatgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaannnnnnnnnnngtgaacctggtgaaaaccccgggtctgcacgcggcgggcaaaggcattaacgttgccaatgtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccaacttcaactttcccggctttgacgtgactcccgcagactgggaacgctttgttaacgacttcctgagctggctgggtcagttcgatatggtctgcgtaagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatcgttgatactagccgtgtagagttagtcgccggtcttatagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcccgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgcgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggaaaataccgtcggcgcaggcggttccatggttggcggatggatttacggcctgctgatgcgcgagtccaccgaacatacgctgcgcctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactgannnnnnnnnnnatgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatgannnnnnnnnnnatgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctgannnnnnnnnnnttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctattacgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccagcgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcggaaaggcgggattggttcgcgcggtccgggtgaaacccagctcgaagccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgagaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgcagctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaattccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtccgcaggtacgaatgccgattgtcgactggcgtcgcttctgtaaacaagaaccagcgttgatcgaatacgtgatctagnnnnnnnnnnngtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatgannnnnnnnnnnatgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctcgtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatctgcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacggtgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcactcaaccggaagggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaaggctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgatgcgctgtcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtaagctgatcagccttgtgcgcaaccttaatcaactgctcaaaagcgagcacgaacgttataacaaataccgcacgagcctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatctgcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcatagcccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttctggccgacagcatcccacacagaaagaggaataannnnnnnnnnnatgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataannnnnnnnnnnatgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtagnnnnnnnnnnnatgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacatcgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccacagtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggtcgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattacttgaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcatacagggcggtacggaagtttgtcagcaaaaactggataacatgcgcaacgcaggcgtaaaagtgtacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggggtcagcgcagagctcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacagaagaccggcaagctcggcgagatcctgttctttatgcgggcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccagcaatgactggcagttcgacggcgacgcggaaactattgcacactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgagatttccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaannnnnnnnnnnatgaaactttttctgacaacggcagcgctaactgcgaccttgacttccggcatgggatttgccagcgataccgtgatcccctgggcgactaacagtggcggtacggaaagcacgcatatcgcggcaatgggcgaggatgtgaatgcgcagcatcagcacatcatccacacgcatgaaggcgtatgtgccgcaaattccggcaccattcaggctgatgaagccgcgctgaccagcaacaagccaccggtacaagtccaaccggaattgttgccgcatcagggctaannnnnnnnnnnatgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaannnnnnnnnnnatggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaannnnnnnnnnnctgaaactaatacctttttatttgctcgcgctattttctgcctcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaacccctgcagataacccgccttataccgtgacaatattgccgccagtagaagcctgcggggaaaaatgtgtgaaactgaacgtagagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaacgacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctacaaccgacagctgcaattacttagggctggcgtaataccgaaaaataaaaagtggaaatggataacgaagatggaaaattactggttcaggcgggtaaaagcgcactaannnnnnnnnnnatgaaaaaatttgatagctgtcatcctgtctttttactgatcggctgtgcccaggtgcctctccgttcctccgtgagcaaaccggtacagcaacctagcgctcagaaagagcaactggccaacgcaaatggtattgatgagtgtcagtcacttccgtatgtgccttcagaccttgcgaagaataaatcattatcaaaccagatcgctgataataccgcatcaaaaaatagcgcaatcagctcacgcattttttgcgaaaaatataagcaaaccaaagaacaggcgcttaccttcttccaggaacttccacaatacatgcgttcgaaagaagttgaagagcaacacatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataannnnnnnnnnnatgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaannnnnnnnnnngtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggactcacaacatgcctgtagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgccagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcgccatccagtcggcaacctggttttcggcaggcaggtcgcgtagggcggttaaaaaaatgatggattgcggcagcggtatttattcttcaatcaatacgttactgaaaaattcgcagattaaaaatatagtcattttaacccataatcattgtttgacgtatattgtaaaaaataagcgcggcgtgaagtttgatccggagtatcttaatgcgctcgttatgtatgcggaaaacggaaaactccttttagacggtgaatttgttcccggctagnnnnnnnnnnnatgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatgannnnnnnnnnnatgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctagnnnnnnnnnnnatgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctgannnnnnnnnnn diff --git a/tests/test_data/outputs/extract/G14/blast_db/contigs.fasta.ndb b/tests/test_data/outputs/extract/G14/blast_db/contigs.fasta.ndb new file mode 100755 index 0000000000000000000000000000000000000000..5fd6f7085890c929e9fa647574a304e6e7c1d317 GIT binary patch literal 20480 zcmeI&O-jR15CGt@3T7h_@eblfJc3sVVx^$TqQ!Nu)s6HdF1xii(B|hgQK1OcMd8aL zZ(j2%Z#3(Lfo-gxxPK5vg0t5&UAV7cs0RjXF5cpex+fBMroW@J_R2I(288|R=f>B@1PyW=FALq$h6Cv9ijXJ**(t0VhJt|{fRHr76y zmcmCgmvV_6xzwTKdy-PgC@t5GQj&6u-}&SH{PDbA&%e*}02Ycf@2E%EZ!#GL4*2E` z`JuIE{q67HEx%M$e{Ux*#qoWT>!~@np2DO@ehfsNA-=RcetUOZamY8HUaa~~S6W=$(-h5(nJUE!Q${uX!5;bK(!ts_@i_#Rjh+10H__}hj z{1fZXGiE38UgKsQ+m^Fn+SIO(f*Vm&-Bk^*QHu8@H{(yVI(0s=C5a;ygonj+w!GkF zx?rS^J`c-O#Mp5S)5)rs?m{^tbGPq#9gku2WxiUl0T}98H(=40Exy@)nLDe$HDgX| zHr*(8G>}=P&hKhJJeRCrC%dRAsFiSq^HK`c~bFvx`VqShp-q!NfLD^OozWxt4E|>KJ=?st$F0XI2?Q z?EunlUaA+xn{X?{#EU*c%$HzsF}!d?kMls5pq z4NNHP1<_XJY}X@))<>F;?7RHrc!$e0!{tTV3ZX3tg7BQ`&E&lTdsJn(Bg#I+~V|wR6RhvXD?L#uViVu>Q1~SugK!;)qQ^~ zDVAP*{>1IMYf@wQjD`6Cej1+gd3(Sm*>OjRp9c-_*xi_wryjRiJ!gfhHzy>-;Z&O; zPRcVsV`EnqUhjTRsjrc3>Wf)5d<-aq1zxz>^UJ>zhQeI#fpT!|@54`Q%uk|H!nEtm z2QN}FmmkOPpLNASK}vh$t%*|eu+OoHkLdCD?B&0JVF){{`{cZ;6LPnwRoDfya8Z^sUz8U!o#)t!+{Ys8naTkd-KO~HCn^9Pm>(YFjLgODI#{O*b0mcke2!h?%T%oh*|+IOrk2d#eqA1qi9@uTRYJWX^|< zb8X{-7M|##^ygO_avi@s<@P^!c4VEd>Bq2|(VGe5-BLm4Wm?20%XhW9_jyZOMCb%{ zii*dITU~m_AA4TBQ>djBuL@gZ7pOGS8@GS@*N=+f+u@n@?Sv%>Y=;R^^~BGLE3Y3p ziyBrJ71n_)Qi~RT$o5?G8mC_FGf@}}Mqk)8hDMDM5T2a>lfZn?tWP2<=|+)&=f^hm ztq*2`HIJFr`2H!3@>lbo5g$)%!SI;zS0s;Rzcv^?A4i2$lWXX1&d!{8`yBimCBeriY^V>aef_gM^;Vk*gF{bLu-yt1xzcmU(_J zHR2ND&mV0XrejO`YHXtKE|=`J*%`81Bw~s{r8xv_qs#pp+n?zUjUKEh-O>$mzEFG| z*$>^Mf&8jW5oJvtp4*7PPZB_kguY$SrfKQ>GWgj9LZXcYL_X!fMErbZHUrKf#F5^X z&cm_JBTU{^`jw>`oG`RwYxLoVfkY5$y$2i8LyDCe^RE)2;WLoUG1!zJ_q9|&5)k?~ z_1W;&Q5zmncqvLu9FJ&C8#Iy>4v%(xG(Th#B{-Z&Q)lZF3Uvd0B?5Q4xKn-U#bM0C zfpvHKJwafyj#avLt#%k?ri22kQi~f0L z9N;_0!@!`2@u;l46C&Hwpj*l1silH~CS*rdaE@TSL4VxI_0o~FJ5;663JQ_;De6#i{eBA=>b zZ%BlAX7hAH+UfGg{C)5&^O1Pdx`USamr;(0Y-7^#=onq&riPvdr`_&Ky4!V}dW7NI zT3=Iz8P{v!I*+#PSJil@%_-x|I>6K{HB>`kXuiEDEHGv4_jBGL0r)|ElX<2)wIHUz z=m)8&v%ilMh$#~UcZJAisfLSrxt;beoobbmdky9i!Qr)(e-E8_D03x3%4?Y4hNe=$ z(k7v0rt?!!d}WSi^1y%E4{JkaJNR{9&ix}Y8e^~q(65R%czf8GH$DF8 z*13&by9`zVq?y0BY!PqG?04gXVUNK(%)dUML2Z77T(J62SMqD{l*9hMsm^Le-yUWE z)1dtQtIS-0vOcuOgBu5JWxUf{h4#O{@U4=*os#d%_Z=Y@m>n(eJz|4egg%Uejj+J+ zHws}ZG9!`5;S2Mn0hrjJXOmci(Igo?PFd3dMdDq44$>*TPnjIgcCivID=XSHfzv(3 zKDe;;G_7V*SB?w4g@N!G*Y>*8YIUFfbyu(ou9ny|^ zq)o!iD+Z$;j7|yVr%keGbFC zTk?aTZ*crG(!?~b*D{O((bEY3!4r=dRYrT1OC3eoE6IR_m>YY zAxKI);D_0_7Zqb%e zb~E(}DI54*9ayIw=-WPiJW3%h7N=Ces;b9GBBxFg3Si-wa+lAQpWPe$r&r-Aa>XH4 zOSQ#j<3qXj1+6xwz-Lhi?KDHGsA`hBFT{NpOe4PXHyybr@Ua0$IAViShy7z>O zH!Fi(4WX-t(4|YNT8K7T{x%9b9{rDPt4X?#`;x>=*9X2_L4>qgj&|*ZW8xf03>l32f4ID8a#L=NXnvu( zQkbp6Cb;8dyB!}r!`6G=`o+8oxmzXkQb;3`f+JIg6(=hcKgK5|2rAriie_&rOyd&ATvhKl(C}!4PLv^7D!| z=TA_*otO_(nc`3D1T1|fhMVSw_bgJhT5%yGu2w6bPVLLl=H`|m*&4SSEt687H{)W! znJg;)&0y=KB(u(%&|Mnen!r)^o3tzzz_ez z8!p3LxUJFa!iSlxw|}jsl05!x-aWP&Lq;6gA}2|JCl_)_$MmBP4N?f524-&83)npT z{u39sC`9LP*Euen)fxNWCgX;z*WFuL{?v8t>}@}n)h%DMD>tBzuh9+Y=|(9jWh#+) zGJ7cWo5=q&-!EIc>w@*f$CwI(f!`A&nX1{Xp9LyzKyT48xf_vsF&2}|F5>`{a}(iO z4bc1*T8Yc{b54nsB69HSg)hEvx$QmT+&QdfReSRTAnasgG>>P|c>%*jzMo$@kxJA$ zw>}UUv}bF!JU3=1wflOcvI(?J_EwQ;GHVzQ0SQ3A;v$*1NplAYlEt@#{r zU=rT|xn0`P$W>Ez;kY!Ub6-!x3cM2R2Jq z@0~u=r}ayGQA_8!QN~5)g}{7NSd`H0c3wZ(uE*k@x@QPr73#`h zPKY3nm~V*QXk=ty--Lhl04?_!9~E7Ec@EiBpP0!o&>{VlB+O5F^Eftk-k3L_;eEh; zt{gGUXt%g@)_oQ_{=StnvC2T)N#QjUPq9wS%I^-OEQO+ZewU^tj9ThvI1NKBpdD3| z8~iK|TokjrX0is?RI)gcc)@hCqQw(;KLP=-kb!Vdz=%RPi;EjV6?O^C~P{7kW@z;CfU zd_dxE3%|u{+8W>9beOcIo#5Pbhkn0+c+x(|r(?PjzYJLIr0 zkbA!^-mNqmn*hxQw!6^V&=Nh&fiK*I9$+W4jhHuJooQFRUV zOBUO>%|eI~t2*myG{?@w#h9pQ1mV?h!NE;mBeyz0qfi^}sQ;X-W8fs+XU;=NQ>(JaTsz+!>C86n}P7&n?Ba;RK>V@c@9KF zOobpJd(K~3nTJN;ehavMN7eQ~&d_VjVkP9(eCgwR30z!h@Kf-c$6tyu`L7~LLR8yJ z|FVa0o{$AFiu1MDXyv|ot?k!06~slw+uD|*&O5Rd3q)lfihizgj^70P8w(#SPhFbv zJtEUd!^Ma^82-sjE^t5sbIL9%ya!jA{gw^LuhS@QH%FJ7_EJooqDbV(GL#xT9N-hV zL;o9Wt0dpk&RKD=zb;d>v`*UY<7U#nL`_J&|E{o|zA-8qAw3-d|KSPOuLvC5NLeFC z_S1{d(w^_5%p4E2IUD;7k+ZpU{!o7h#vk7GJAkeo>@aAp3<0t7^Inj4O(|9v3Z`}B z1J;nTDtEgiUjY`V#i$RYl9V1uTokn14=6e zv-+Csv%}Muo#-%&F4bKqn!WK{I7^(95vY__Ca+3))pW5oPYu#9qMmjO#?02nlc*X= zi$Xd8Mpe)M{fanKmTgQ-azh~Xh!PDr(phjSiqL|_w`nrdCxM_R3D9e>`A1fRqR-e1 z#Kc6X?C*VP1Q=#hd9SFxW?_v>y9kIx#1^lMsEFwaj3`EpF4e*XILmnin!uS6kh_aq zJ%Zv;Rp&o4Gp6c1Xyr=$^}-c7qVCtcN{53B1$J3Da080nRq#y+O?HX_4S$ zr5O4VU+Hpcls*<6=HD?Rnx~+EQ7bdKN+L;e zm1;q5mDxPH?c@#Y9dau$dd9f`Z(ruU=W*5`CBJS?62)$Gufj*=K(C@9(8l zr`2XrZ^Nf#vHnb<@Fr(`4s*u#*me)UjJW0zPCbCz_BQKz>8HXkvQY82nBCiwcLWXH zw-|c6_VI1o(iJ^zWZev3X;LwxEAegMr4M($i?j-{FKn%sL1pRqydsJ(uF zwf%JZAKe@M)8NT(lP^h?X4Yy~xZ9J5HI-_|cn%RK922(o3&Q@fow^CkDJ)R`G-NC# z{9x=4uPuk7wqCpiRP8eI7`Iv1MIOt(D_PO{+t`4!#c~%E=SbE7_N6PkGChF#*#S{K z`f`_As&%@?Qtx}0_ZtuMD_g@_Pe;9s%2i_-`2QaxNQVm@rfqH*ztq^68aH<$=!DR! z^I4x-kbGQ^=nVWwbH0l4T_)G{=_At()z5A ze5W9`rR1n$`-JU+p4_B>{Lv)G0b)oO3Un^#Hxwkf(Kk0-`*s!nn$(G#7v!*)BS{-` z%5eeX|yfIzAb_}!j zQBAGnoA4`A&93v3&#e5gH8i?cYW9j~-6ge`h!_akX7J8P+hWmVZY`A^`M~5#QTF9( zFP48T&AKR8B1Na}i+tqg6>!=+SdRr4J#XelcMKB<@;gWyu-U_hiEUVT6A)njFntTH z5OCV-+ZC?wvu^J64sd`GMMr*2!x`Mwws%wMX272t+iSOch$s=KmU{3WJQ@kPc%>8} z0frnnXZFA2I_5vqAGfV=EncKCQ)$VVCrpsLNrHJW1ugFf(CW+_TLZ@YkTAi!cz_O?hdb~5yv;=XK}H9 zR>9u4`WX=Yc=ts9&AXl!SM@&L@=on9aF`q<6J;8IdO03;u!KMj^5b-^ zh`J z(j8#}d@tCj?@@!`Fe--8-2@)Z?;(K6!ux-}tWef}Nmh2PEtECw!q+*Ce`%TB%gFfp zX+co0QCYD-YnWA^?#mY`S^%rO_XUKLn5^m@XallnlQ`?)jUc1uFK^P+Xm0ED8g&>i z>6Z8)HiPm%&+$UM=~Y`#w2L+wFDtuH^}g7~hTNWY54lCbaE$PyJ>GKM*Ra~BxJ*z! zA}DCk|9%!6t^KGDl3*6cWK(D?Lc@1YSi@p{p+HqW9@Fr1T_$d!a|DnwR0}#*Gvzd|71X z{g6z)-S85T*RV^r-#=duIdzJkyQgkAju_6m?CR~ezpidNTg?3z?>t+V5+Fc;009C7 z2oNAZfB*pkw+qz1`!D+c`t+KWnseOrpYYhnoWK3cJb?fK0t5&UAV7cs0RjXF5J-XS zzayHM#HQV}xgruEK!5-N0t5&UAV7cs0RsO);N@6E6QfwqKVBEpx7F%>xjxnP9Q72v tW~JgB=Xra^(*N(D`o%y literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G14/blast_db/contigs.fasta.nto b/tests/test_data/outputs/extract/G14/blast_db/contigs.fasta.nto new file mode 100755 index 0000000000000000000000000000000000000000..20d5cb86e6dff1f3684dc229a358a2ea697cecfb GIT binary patch literal 8 KcmZQ%fB*mh5C8%I literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G14/filtered.hsps.txt b/tests/test_data/outputs/extract/G14/filtered.hsps.txt new file mode 100755 index 0000000..79d1546 --- /dev/null +++ b/tests/test_data/outputs/extract/G14/filtered.hsps.txt @@ -0,0 +1,24 @@ +qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore locus_name ext_start ext_end is_5prime_complete is_3prime_complete is_complete is_5prime_boundary is_3prime_boundary is_on_boundary reverse complement is_extended is_5p_extended is_3p_extended +0 0 102 23981 0 101 11 112 102 0 100.0 100 100 plus 1.93e-51 189 locus_1 11 112 True True True False False False False True False False False +9 0 762 23981 0 761 124 885 762 0 100.0 100 100 plus 0.0 1408 locus_10 124 885 True True True False False False False True False False False +10 0 858 23981 0 857 897 1754 858 0 100.0 100 100 plus 0.0 1585 locus_11 897 1754 True True True False False False False True False False False +11 0 972 23981 0 971 1766 2737 972 0 100.0 100 100 plus 0.0 1796 locus_12 1766 2737 True True True False False False False True False False False +12 0 1098 23981 0 1097 2749 3846 1098 0 100.0 100 100 plus 0.0 2028 locus_13 2749 3846 True True True False False False False True False False False +12 0 1098 23981 0 1097 22872 23969 1098 0 100.0 100 100 plus 0.0 2028 locus_13 22872 23969 True True True False False False False True False False False +12 0 1098 23981 0 1097 2749 3846 1098 0 100.0 100 100 plus 0.0 2028 locus_13 2749 3846 True True True False False False False True False False False +12 0 1098 23981 0 1097 22872 23969 1098 0 100.0 100 100 plus 0.0 2028 locus_13 22872 23969 True True True False False False False True False False False +13 0 1281 23981 0 1280 3858 5138 1281 0 100.0 100 100 plus 0.0 2366 locus_14 3858 5138 True True True False False False False True False False False +14 0 1434 23981 0 1433 5150 6583 1434 0 100.0 100 100 plus 0.0 2649 locus_15 5150 6583 True True True False False False False True False False False +15 0 1464 23981 0 1463 6595 8058 1464 0 100.0 100 100 plus 0.0 2704 locus_16 6595 8058 True True True False False False False True False False False +16 0 1836 23981 0 1835 8070 9905 1836 0 100.0 100 100 plus 0.0 3391 locus_17 8070 9905 True True True False False False False True False False False +17 0 1914 23981 0 1913 9917 11830 1914 0 100.0 100 100 plus 0.0 3535 locus_18 9917 11830 True True True False False False False True False False False +18 0 2037 23981 0 2036 11842 13878 2037 0 100.0 100 100 plus 0.0 3762 locus_19 11842 13878 True True True False False False False True False False False +1 0 285 23981 0 284 13890 14174 285 0 100.0 100 100 plus 1.1e-152 527 locus_2 13890 14174 True True True False False False False True False False False +19 0 4935 23981 0 4934 14186 19120 4935 0 100.0 100 100 plus 0.0 9114 locus_20 14186 19120 True True True False False False False True False False False +2 0 327 23981 0 326 19132 19458 327 0 100.0 100 100 plus 5.73e-176 604 locus_3 19132 19458 True True True False False False False True False False False +3 0 417 23981 0 416 19470 19886 417 0 100.0 100 100 plus 0.0 771 locus_4 19470 19886 True True True False False False False True False False False +4 0 444 23981 0 443 19898 20341 444 0 100.0 100 100 plus 0.0 821 locus_5 19898 20341 True True True False False False False True False False False +5 0 543 23981 0 542 20353 20895 543 0 100.0 100 100 plus 0.0 1003 locus_6 20353 20895 True True True False False False False True False False False +6 0 606 23981 0 605 20907 21512 606 0 100.0 100 100 plus 0.0 1120 locus_7 20907 21512 True True True False False False False True False False False +7 0 642 23981 0 641 21524 22165 642 0 100.0 100 100 plus 0.0 1186 locus_8 21524 22165 True True True False False False False True False False False +8 0 684 23981 0 683 22177 22860 684 0 100.0 100 100 plus 0.0 1264 locus_9 22177 22860 True True True False False False False True False False False diff --git a/tests/test_data/outputs/extract/G14/processed.extracted.seqs.fasta b/tests/test_data/outputs/extract/G14/processed.extracted.seqs.fasta new file mode 100755 index 0000000..8f2eb45 --- /dev/null +++ b/tests/test_data/outputs/extract/G14/processed.extracted.seqs.fasta @@ -0,0 +1,42 @@ +>locus_1:0:0:0 +atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa +>locus_10:9:0:1 +atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa +>locus_11:10:0:2 +gtgaacctggtgaaaaccccgggtctgcacgcggcgggcaaaggcattaacgttgccaatgtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccaacttcaactttcccggctttgacgtgactcccgcagactgggaacgctttgttaacgacttcctgagctggctgggtcagttcgatatggtctgcgtaagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatcgttgatactagccgtgtagagttagtcgccggtcttatagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcccgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgcgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggaaaataccgtcggcgcaggcggttccatggttggcggatggatttacggcctgctgatgcgcgagtccaccgaacatacgctgcgcctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga +>locus_12:11:0:3 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>locus_13:12:0:4 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>locus_13:12:0:5 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>locus_14:13:0:6 +ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctattacgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccagcgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcggaaaggcgggattggttcgcgcggtccgggtgaaacccagctcgaagccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgagaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgcagctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaattccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtccgcaggtacgaatgccgattgtcgactggcgtcgcttctgtaaacaagaaccagcgttgatcgaatacgtgatctag +>locus_15:14:0:7 +gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga +>locus_16:15:0:8 +atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctcgtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatctgcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacggtgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcactcaaccggaagggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaaggctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgatgcgctgtcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtaagctgatcagccttgtgcgcaaccttaatcaactgctcaaaagcgagcacgaacgttataacaaataccgcacgagcctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatctgcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcatagcccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttctggccgacagcatcccacacagaaagaggaataa +>locus_17:16:0:9 +atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa +>locus_18:17:0:10 +atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag +>locus_19:18:0:11 +atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacatcgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccacagtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggtcgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattacttgaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcatacagggcggtacggaagtttgtcagcaaaaactggataacatgcgcaacgcaggcgtaaaagtgtacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggggtcagcgcagagctcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacagaagaccggcaagctcggcgagatcctgttctttatgcgggcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccagcaatgactggcagttcgacggcgacgcggaaactattgcacactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgagatttccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa +>locus_2:1:0:12 +atgaaactttttctgacaacggcagcgctaactgcgaccttgacttccggcatgggatttgccagcgataccgtgatcccctgggcgactaacagtggcggtacggaaagcacgcatatcgcggcaatgggcgaggatgtgaatgcgcagcatcagcacatcatccacacgcatgaaggcgtatgtgccgcaaattccggcaccattcaggctgatgaagccgcgctgaccagcaacaagccaccggtacaagtccaaccggaattgttgccgcatcagggctaa +>locus_20:19:0:13 +atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa +>locus_3:2:0:14 +atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa +>locus_4:3:0:15 +ctgaaactaatacctttttatttgctcgcgctattttctgcctcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaacccctgcagataacccgccttataccgtgacaatattgccgccagtagaagcctgcggggaaaaatgtgtgaaactgaacgtagagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaacgacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctacaaccgacagctgcaattacttagggctggcgtaataccgaaaaataaaaagtggaaatggataacgaagatggaaaattactggttcaggcgggtaaaagcgcactaa +>locus_5:4:0:16 +atgaaaaaatttgatagctgtcatcctgtctttttactgatcggctgtgcccaggtgcctctccgttcctccgtgagcaaaccggtacagcaacctagcgctcagaaagagcaactggccaacgcaaatggtattgatgagtgtcagtcacttccgtatgtgccttcagaccttgcgaagaataaatcattatcaaaccagatcgctgataataccgcatcaaaaaatagcgcaatcagctcacgcattttttgcgaaaaatataagcaaaccaaagaacaggcgcttaccttcttccaggaacttccacaatacatgcgttcgaaagaagttgaagagcaacacatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa +>locus_6:5:0:17 +atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa +>locus_7:6:0:18 +gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggactcacaacatgcctgtagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgccagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcgccatccagtcggcaacctggttttcggcaggcaggtcgcgtagggcggttaaaaaaatgatggattgcggcagcggtatttattcttcaatcaatacgttactgaaaaattcgcagattaaaaatatagtcattttaacccataatcattgtttgacgtatattgtaaaaaataagcgcggcgtgaagtttgatccggagtatcttaatgcgctcgttatgtatgcggaaaacggaaaactccttttagacggtgaatttgttcccggctag +>locus_8:7:0:19 +atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga +>locus_9:8:0:20 +atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/extract/G14/raw.extracted.seqs.fasta b/tests/test_data/outputs/extract/G14/raw.extracted.seqs.fasta new file mode 100755 index 0000000..8f2eb45 --- /dev/null +++ b/tests/test_data/outputs/extract/G14/raw.extracted.seqs.fasta @@ -0,0 +1,42 @@ +>locus_1:0:0:0 +atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa +>locus_10:9:0:1 +atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa +>locus_11:10:0:2 +gtgaacctggtgaaaaccccgggtctgcacgcggcgggcaaaggcattaacgttgccaatgtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccaacttcaactttcccggctttgacgtgactcccgcagactgggaacgctttgttaacgacttcctgagctggctgggtcagttcgatatggtctgcgtaagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatcgttgatactagccgtgtagagttagtcgccggtcttatagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcccgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgcgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggaaaataccgtcggcgcaggcggttccatggttggcggatggatttacggcctgctgatgcgcgagtccaccgaacatacgctgcgcctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga +>locus_12:11:0:3 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>locus_13:12:0:4 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>locus_13:12:0:5 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>locus_14:13:0:6 +ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctattacgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccagcgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcggaaaggcgggattggttcgcgcggtccgggtgaaacccagctcgaagccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgagaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgcagctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaattccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtccgcaggtacgaatgccgattgtcgactggcgtcgcttctgtaaacaagaaccagcgttgatcgaatacgtgatctag +>locus_15:14:0:7 +gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga +>locus_16:15:0:8 +atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctcgtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatctgcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacggtgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcactcaaccggaagggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaaggctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgatgcgctgtcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtaagctgatcagccttgtgcgcaaccttaatcaactgctcaaaagcgagcacgaacgttataacaaataccgcacgagcctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatctgcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcatagcccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttctggccgacagcatcccacacagaaagaggaataa +>locus_17:16:0:9 +atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa +>locus_18:17:0:10 +atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag +>locus_19:18:0:11 +atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacatcgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccacagtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggtcgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattacttgaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcatacagggcggtacggaagtttgtcagcaaaaactggataacatgcgcaacgcaggcgtaaaagtgtacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggggtcagcgcagagctcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacagaagaccggcaagctcggcgagatcctgttctttatgcgggcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccagcaatgactggcagttcgacggcgacgcggaaactattgcacactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgagatttccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa +>locus_2:1:0:12 +atgaaactttttctgacaacggcagcgctaactgcgaccttgacttccggcatgggatttgccagcgataccgtgatcccctgggcgactaacagtggcggtacggaaagcacgcatatcgcggcaatgggcgaggatgtgaatgcgcagcatcagcacatcatccacacgcatgaaggcgtatgtgccgcaaattccggcaccattcaggctgatgaagccgcgctgaccagcaacaagccaccggtacaagtccaaccggaattgttgccgcatcagggctaa +>locus_20:19:0:13 +atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa +>locus_3:2:0:14 +atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa +>locus_4:3:0:15 +ctgaaactaatacctttttatttgctcgcgctattttctgcctcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaacccctgcagataacccgccttataccgtgacaatattgccgccagtagaagcctgcggggaaaaatgtgtgaaactgaacgtagagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaacgacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctacaaccgacagctgcaattacttagggctggcgtaataccgaaaaataaaaagtggaaatggataacgaagatggaaaattactggttcaggcgggtaaaagcgcactaa +>locus_5:4:0:16 +atgaaaaaatttgatagctgtcatcctgtctttttactgatcggctgtgcccaggtgcctctccgttcctccgtgagcaaaccggtacagcaacctagcgctcagaaagagcaactggccaacgcaaatggtattgatgagtgtcagtcacttccgtatgtgccttcagaccttgcgaagaataaatcattatcaaaccagatcgctgataataccgcatcaaaaaatagcgcaatcagctcacgcattttttgcgaaaaatataagcaaaccaaagaacaggcgcttaccttcttccaggaacttccacaatacatgcgttcgaaagaagttgaagagcaacacatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa +>locus_6:5:0:17 +atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa +>locus_7:6:0:18 +gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggactcacaacatgcctgtagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgccagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcgccatccagtcggcaacctggttttcggcaggcaggtcgcgtagggcggttaaaaaaatgatggattgcggcagcggtatttattcttcaatcaatacgttactgaaaaattcgcagattaaaaatatagtcattttaacccataatcattgtttgacgtatattgtaaaaaataagcgcggcgtgaagtttgatccggagtatcttaatgcgctcgttatgtatgcggaaaacggaaaactccttttagacggtgaatttgttcccggctag +>locus_8:7:0:19 +atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga +>locus_9:8:0:20 +atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/extract/G14/seq_data.txt b/tests/test_data/outputs/extract/G14/seq_data.txt new file mode 100755 index 0000000..8581fc2 --- /dev/null +++ b/tests/test_data/outputs/extract/G14/seq_data.txt @@ -0,0 +1,22 @@ +id seqid locus_name query_id qlen start end sub_start sub_ent ident qcovs bitscore reverse complement is_complete is_trunc fivep_trunc threep_trunc is_extended is_5p_extended is_3p_extended seq start_codon stop_codon is_stop_valid is_start_valid is_cds_valid +0 0 locus_1 0 102 11 113 11 112 100.0 100 189 False True True False False False False False False atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa atg taa True True True +1 0 locus_10 9 762 124 886 124 885 100.0 100 1408 False True True False False False False False False atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa atg taa True True True +2 0 locus_11 10 858 897 1755 897 1754 100.0 100 1585 False True True False False False False False False gtgaacctggtgaaaaccccgggtctgcacgcggcgggcaaaggcattaacgttgccaatgtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccaacttcaactttcccggctttgacgtgactcccgcagactgggaacgctttgttaacgacttcctgagctggctgggtcagttcgatatggtctgcgtaagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatcgttgatactagccgtgtagagttagtcgccggtcttatagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcccgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgcgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggaaaataccgtcggcgcaggcggttccatggttggcggatggatttacggcctgctgatgcgcgagtccaccgaacatacgctgcgcctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga gtg tga True True True +3 0 locus_12 11 972 1766 2738 1766 2737 100.0 100 1796 False True True False False False False False False atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga atg tga True True True +4 0 locus_13 12 1098 2749 3847 2749 3846 100.0 100 2028 False True True False False False False False False atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga atg tga True True True +5 0 locus_13 12 1098 22872 23970 22872 23969 100.0 100 2028 False True True False False False False False False atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga atg tga True True True +6 0 locus_14 13 1281 3858 5139 3858 5138 100.0 100 2366 False True True False False False False False False ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctattacgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccagcgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcggaaaggcgggattggttcgcgcggtccgggtgaaacccagctcgaagccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgagaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgcagctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaattccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtccgcaggtacgaatgccgattgtcgactggcgtcgcttctgtaaacaagaaccagcgttgatcgaatacgtgatctag ttg tag True True True +7 0 locus_15 14 1434 5150 6584 5150 6583 100.0 100 2649 False True True False False False False False False gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga gtg tga True True True +8 0 locus_16 15 1464 6595 8059 6595 8058 100.0 100 2704 False True True False False False False False False atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctcgtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatctgcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacggtgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcactcaaccggaagggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaaggctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgatgcgctgtcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtaagctgatcagccttgtgcgcaaccttaatcaactgctcaaaagcgagcacgaacgttataacaaataccgcacgagcctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatctgcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcatagcccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttctggccgacagcatcccacacagaaagaggaataa atg taa True True True +9 0 locus_17 16 1836 8070 9906 8070 9905 100.0 100 3391 False True True False False False False False False atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa atg taa True True True +10 0 locus_18 17 1914 9917 11831 9917 11830 100.0 100 3535 False True True False False False False False False atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag atg tag True True True +11 0 locus_19 18 2037 11842 13879 11842 13878 100.0 100 3762 False True True False False False False False False atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacatcgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccacagtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggtcgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattacttgaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcatacagggcggtacggaagtttgtcagcaaaaactggataacatgcgcaacgcaggcgtaaaagtgtacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggggtcagcgcagagctcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacagaagaccggcaagctcggcgagatcctgttctttatgcgggcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccagcaatgactggcagttcgacggcgacgcggaaactattgcacactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgagatttccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa atg taa True True True +12 0 locus_2 1 285 13890 14175 13890 14174 100.0 100 527 False True True False False False False False False atgaaactttttctgacaacggcagcgctaactgcgaccttgacttccggcatgggatttgccagcgataccgtgatcccctgggcgactaacagtggcggtacggaaagcacgcatatcgcggcaatgggcgaggatgtgaatgcgcagcatcagcacatcatccacacgcatgaaggcgtatgtgccgcaaattccggcaccattcaggctgatgaagccgcgctgaccagcaacaagccaccggtacaagtccaaccggaattgttgccgcatcagggctaa atg taa True True True +13 0 locus_20 19 4935 14186 19121 14186 19120 100.0 100 9114 False True True False False False False False False atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa atg taa True True True +14 0 locus_3 2 327 19132 19459 19132 19458 100.0 100 604 False True True False False False False False False atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa atg taa True True True +15 0 locus_4 3 417 19470 19887 19470 19886 100.0 100 771 False True True False False False False False False ctgaaactaatacctttttatttgctcgcgctattttctgcctcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaacccctgcagataacccgccttataccgtgacaatattgccgccagtagaagcctgcggggaaaaatgtgtgaaactgaacgtagagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaacgacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctacaaccgacagctgcaattacttagggctggcgtaataccgaaaaataaaaagtggaaatggataacgaagatggaaaattactggttcaggcgggtaaaagcgcactaa ctg taa True True True +16 0 locus_5 4 444 19898 20342 19898 20341 100.0 100 821 False True True False False False False False False atgaaaaaatttgatagctgtcatcctgtctttttactgatcggctgtgcccaggtgcctctccgttcctccgtgagcaaaccggtacagcaacctagcgctcagaaagagcaactggccaacgcaaatggtattgatgagtgtcagtcacttccgtatgtgccttcagaccttgcgaagaataaatcattatcaaaccagatcgctgataataccgcatcaaaaaatagcgcaatcagctcacgcattttttgcgaaaaatataagcaaaccaaagaacaggcgcttaccttcttccaggaacttccacaatacatgcgttcgaaagaagttgaagagcaacacatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa atg taa True True True +17 0 locus_6 5 543 20353 20896 20353 20895 100.0 100 1003 False True True False False False False False False atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa atg taa True True True +18 0 locus_7 6 606 20907 21513 20907 21512 100.0 100 1120 False True True False False False False False False gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggactcacaacatgcctgtagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgccagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcgccatccagtcggcaacctggttttcggcaggcaggtcgcgtagggcggttaaaaaaatgatggattgcggcagcggtatttattcttcaatcaatacgttactgaaaaattcgcagattaaaaatatagtcattttaacccataatcattgtttgacgtatattgtaaaaaataagcgcggcgtgaagtttgatccggagtatcttaatgcgctcgttatgtatgcggaaaacggaaaactccttttagacggtgaatttgttcccggctag gtg tag True True True +19 0 locus_8 7 642 21524 22166 21524 22165 100.0 100 1186 False True True False False False False False False atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga atg tga True True True +20 0 locus_9 8 684 22177 22861 22177 22860 100.0 100 1264 False True True False False False False False False atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag atg tag True True True diff --git a/tests/test_data/outputs/extract/G2/blast/hsps.txt b/tests/test_data/outputs/extract/G2/blast/hsps.txt new file mode 100755 index 0000000..bca347a --- /dev/null +++ b/tests/test_data/outputs/extract/G2/blast/hsps.txt @@ -0,0 +1,20 @@ +0 0 102 22872 1 102 12 113 102 0 100.000 100 100 plus 1.84e-51 189 +1 0 285 22872 1 285 13891 14175 285 0 100.000 100 100 plus 1.05e-152 527 +2 0 327 22872 1 327 19133 19459 327 0 100.000 100 100 plus 5.47e-176 604 +3 0 417 22872 1 417 19471 19887 417 0 100.000 100 100 plus 0.0 771 +4 0 444 22872 1 444 19899 20342 444 0 100.000 100 100 plus 0.0 821 +5 0 543 22872 1 543 20354 20896 543 0 100.000 100 100 plus 0.0 1003 +6 0 606 22872 1 606 20908 21513 606 0 100.000 100 100 plus 0.0 1120 +7 0 642 22872 1 642 21525 22166 642 0 100.000 100 100 plus 0.0 1186 +8 0 684 22872 1 684 22178 22861 684 0 100.000 100 100 plus 0.0 1264 +9 0 762 22872 1 762 125 886 762 0 100.000 100 100 plus 0.0 1408 +10 0 858 22872 1 858 898 1755 858 0 100.000 100 100 plus 0.0 1585 +11 0 972 22872 1 972 1767 2738 972 0 100.000 100 100 plus 0.0 1796 +12 0 1098 22872 1 1098 2750 3847 1098 0 100.000 100 100 plus 0.0 2028 +13 0 1281 22872 1 1281 3859 5139 1281 0 100.000 100 100 plus 0.0 2366 +14 0 1434 22872 1 1434 5151 6584 1434 0 100.000 100 100 plus 0.0 2649 +15 0 1464 22872 1 1464 6596 8059 1464 0 100.000 100 100 plus 0.0 2704 +16 0 1836 22872 1 1836 8071 9906 1836 0 100.000 100 100 plus 0.0 3391 +17 0 1914 22872 1 1914 9918 11831 1914 0 100.000 100 100 plus 0.0 3535 +18 0 2037 22872 1 2037 11843 13879 2037 0 100.000 100 100 plus 0.0 3762 +19 0 4935 22872 1 4935 14187 19121 4935 0 100.000 100 100 plus 0.0 9114 diff --git a/tests/test_data/outputs/extract/G2/blast_db/contigs.fasta b/tests/test_data/outputs/extract/G2/blast_db/contigs.fasta new file mode 100755 index 0000000..debbb9f --- /dev/null +++ b/tests/test_data/outputs/extract/G2/blast_db/contigs.fasta @@ -0,0 +1,2 @@ +>0 +nnnnnnnnnnnatgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaannnnnnnnnnnatgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaannnnnnnnnnngtgaacctggtgaaaaccccgggtctgcacgcggcgggcaaaggcattaacgttgccaatgtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccaacttcaactttcccggctttgacgtgactcccgcagactgggaacgctttgttaacgacttcctgagctggctgggtcagttcgatatggtctgcgtaagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatcgttgatactagccgtgtagagttagtcgccggtcttatagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcccgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgcgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggaaaataccgtcggcgcaggcggttccatggttggcggatggatttacggcctgctgatgcgcgagtccaccgaacatacgctgcgcctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactgannnnnnnnnnnatgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatgannnnnnnnnnnatgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctgannnnnnnnnnnttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctattacgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccagcgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcggaaaggcgggattggttcgcgcggtccgggtgaaacccagctcgaagccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgagaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgcagctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaattccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtccgcaggtacgaatgccgattgtcgactggcgtcgcttctgtaaacaagaaccagcgttgatcgaatacgtgatctagnnnnnnnnnnngtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatgannnnnnnnnnnatgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctcgtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatctgcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacggtgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcactcaaccggaagggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaaggctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgatgcgctgtcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtaagctgatcagccttgtgcgcaaccttaatcaactgctcaaaagcgagcacgaacgttataacaaataccgcacgagcctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatctgcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcatagcccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttctggccgacagcatcccacacagaaagaggaataannnnnnnnnnnatgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataannnnnnnnnnnatgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtagnnnnnnnnnnnatgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacatcgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccacagtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggtcgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattacttgaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcatacagggcggtacggaagtttgtcagcaaaaactggataacatgcgcaacgcaggcgtaaaagtgtacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggggtcagcgcagagctcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacagaagaccggcaagctcggcgagatcctgttctttatgcgggcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccagcaatgactggcagttcgacggcgacgcggaaactattgcacactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgagatttccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaannnnnnnnnnnatgaaactttttctgacaacggcagcgctaactgcgaccttgacttccggcatgggatttgccagcgataccgtgatcccctgggcgactaacagtggcggtacggaaagcacgcatatcgcggcaatgggcgaggatgtgaatgcgcagcatcagcacatcatccacacgcatgaaggcgtatgtgccgcaaattccggcaccattcaggctgatgaagccgcgctgaccagcaacaagccaccggtacaagtccaaccggaattgttgccgcatcagggctaannnnnnnnnnnatgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaannnnnnnnnnnatggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaannnnnnnnnnnctgaaactaatacctttttatttgctcgcgctattttctgcctcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaacccctgcagataacccgccttataccgtgacaatattgccgccagtagaagcctgcggggaaaaatgtgtgaaactgaacgtagagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaacgacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctacaaccgacagctgcaattacttagggctggcgtaataccgaaaaataaaaagtggaaatggataacgaagatggaaaattactggttcaggcgggtaaaagcgcactaannnnnnnnnnnatgaaaaaatttgatagctgtcatcctgtctttttactgatcggctgtgcccaggtgcctctccgttcctccgtgagcaaaccggtacagcaacctagcgctcagaaagagcaactggccaacgcaaatggtattgatgagtgtcagtcacttccgtatgtgccttcagaccttgcgaagaataaatcattatcaaaccagatcgctgataataccgcatcaaaaaatagcgcaatcagctcacgcattttttgcgaaaaatataagcaaaccaaagaacaggcgcttaccttcttccaggaacttccacaatacatgcgttcgaaagaagttgaagagcaacacatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataannnnnnnnnnnatgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaannnnnnnnnnngtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggactcacaacatgcctgtagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgccagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcgccatccagtcggcaacctggttttcggcaggcaggtcgcgtagggcggttaaaaaaatgatggattgcggcagcggtatttattcttcaatcaatacgttactgaaaaattcgcagattaaaaatatagtcattttaacccataatcattgtttgacgtatattgtaaaaaataagcgcggcgtgaagtttgatccggagtatcttaatgcgctcgttatgtatgcggaaaacggaaaactccttttagacggtgaatttgttcccggctagnnnnnnnnnnnatgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatgannnnnnnnnnnatgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctagnnnnnnnnnnn diff --git a/tests/test_data/outputs/extract/G2/blast_db/contigs.fasta.ndb b/tests/test_data/outputs/extract/G2/blast_db/contigs.fasta.ndb new file mode 100755 index 0000000000000000000000000000000000000000..5fd6f7085890c929e9fa647574a304e6e7c1d317 GIT binary patch literal 20480 zcmeI&O-jR15CGt@3T7h_@eblfJc3sVVx^$TqQ!Nu)s6HdF1xii(B|hgQK1OcMd8aL zZ(j2%Z#3(Lfo-gxxPK5vg0t5&UAV7cs0RjXF5cpex+fBMroW@J_R2I(m+4ypqiHV!bpVKM|-$ z5Jf;QFC_^mDCSj~r(kHHqhMrUWTK#8XlP|%q2TBXlwpjBgjp0B0cAOW#DSVN05MQZ GYy$w!8y>{~ literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G2/blast_db/contigs.fasta.njs b/tests/test_data/outputs/extract/G2/blast_db/contigs.fasta.njs new file mode 100755 index 0000000..bccfcb0 --- /dev/null +++ b/tests/test_data/outputs/extract/G2/blast_db/contigs.fasta.njs @@ -0,0 +1,22 @@ +{ + "version": "1.2", + "dbname": "contigs.fasta", + "dbtype": "Nucleotide", + "db-version": 5, + "description": "locidex/extract/G2/blast_db/contigs.fasta", + "number-of-letters": 22872, + "number-of-sequences": 1, + "last-updated": "2024-06-10T11:08:00", + "number-of-volumes": 1, + "bytes-total": 42908, + "bytes-to-cache": 5952, + "files": [ + "contigs.fasta.ndb", + "contigs.fasta.nhr", + "contigs.fasta.nin", + "contigs.fasta.not", + "contigs.fasta.nsq", + "contigs.fasta.ntf", + "contigs.fasta.nto" + ] +} diff --git a/tests/test_data/outputs/extract/G2/blast_db/contigs.fasta.not b/tests/test_data/outputs/extract/G2/blast_db/contigs.fasta.not new file mode 100755 index 0000000000000000000000000000000000000000..d6562660b009ba390419e760e6e10a80c529e3d8 GIT binary patch literal 20 OcmZQ%fB;4)4Wa-5Gynqt literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G2/blast_db/contigs.fasta.nsq b/tests/test_data/outputs/extract/G2/blast_db/contigs.fasta.nsq new file mode 100755 index 0000000000000000000000000000000000000000..4c0cfcd0c2c3106e85b66139d90770aa4f13ebf3 GIT binary patch literal 5808 zcmWModpwhi<9^-73U%Z*&XK~TE5~*2Qfd>5b7XVUcDB9S3_E^xWUu6!QZ8%L+Pi5f zdPPI!5<7CKL&y0hrIJya`$j2A$1Q$6pU?CB@q9kdzt8gk3V-YO)FYgCnT!HQe8ZX| zY~@8y%ZK-iua%WQY9Nz1fp791YR>KFFzJau1JPtiE-sGV-4S=0IjF;DN&c8W$tZcp z7~DLYmgh-zOEL1iE;RLx@1Eiq9yg2!`|_4KeYNf4`YcE^+W2N(mO>X(ORDPLmd}@c zX8(EG;soA%)Pif*cm_4efg0v8&;@)suuiM^HgU9yVRrQVCdRhk1yz-9uyv^ zIO&==gsy#0tnsoz-K847191X0}p^w%(< zus1|oR&d;ogsl!X9NB&8*|ApFNrvmIv?W4w5(MG7m4@V9LI+fNg!He7zg3$<8m=wZ zPl2#HFHYfo(`&!$9+SLKxtBY;igBCU9a8ZK@tHnf{%`TZWW~LBZGMr}nX9}1SWqs! z`tq6AdEczc_yrsD5&S$b;XAzFHQ8xvh`%Qd@Z3?CRq*HbP|chrp8kx85QkG6Lfn)W z{-&mGY`p%%oDx41yVO_H8h9oshlSp_>2r&}6Z*qlAAkyQEc-4{ zF_)gk@0oVPK?hY1rW+Gwmf>Gw6Q9uIA2=uuf#C=zyyL`)o#RCsV$5tj@v==KAC_Vr z@4Bj?ojy`e$24!%2Lhk{WZ_%2=l_Vx)RpFO8K735R_t7AYnr2ijk^Io8!sxqxRX3} zK69T4n>nGJRq=J=AKZV}Ml@K1+5Sj4<1VAHBG%tE#^mJ@5yvZD|a2qd`Jv!j2xPQADfQ1OD{jblg;k|ppTF*9* z3LE+2$FhBI))adFd8#dcZfngtRn>!GH=qp(qa89~+a+4$`pxgF*FEGfY!IUpG%0F6 zJ8pU5g<$060ug)3f@E1il-Qd8dW2AcgHQA?nWfX-WC@BWF+p zn&QG5kWFgT!S~ypZCK&nQTUD(27@u@*N>o4BLsx65bPCN_E~gG<;5K+67cfqnt{!M zOt9)H%Ld;wfl>Wx**WOzg)JBuG5rt8V>_($Ma;%gA@%qQx`VqdC*C0kKPiy+Pl&w+ zo3?4VImoo5-(}hiUp4pn<+x#^(%;QlnzLC+tfg%A*s8A+0S1n(r}ObQ9>B*~eiHJ59IKx@t21sC+#*xo)ur(x7GMct74UfVjHlb>ao|8Ko= z83_;!)eq3I#obl5G4~gXciC1oC_22v*pa!DYfSI6Key3 zelvUw40@Ul$;;XxvI9L{U^cloM$~!lXGn82tUhkVF>$dmepbcFqmi&#t6$$O&%F}g$jy9h10%p=s~uC>wwe_2ZCt9DsNt+h zMR-=jWJ21hvZsRG@HFd*Wc{k6j^)=Ou9$3V*81cqUF(*Xz81H|{z|(0b)06T@o=@D zxzd8?J$Ic?+w`k?w9WRUX=V*zZjl= zCT{EL<_2L(g~9D1@@cB^d|qyw!)xbimGnWarBrx$CFOr%#~;hxh>+?U=C}Td6tJXT zv^mq|IjD8jfSH52BXzklLo<66FzJuAC3778J1*t^5fy_mS^?-+#cTYX9L&4UfOMPO zI-Y$7y8zP8-&Hz~w_)|T3&8NF;62t~AJL$$AW|V*{--_pEqKy#PxnMyrLudcs^|H^ z{5{L8T!6CLztfW!2W@1$*I$PAd^rEDoW6yU?b&fW z3tvssX&|-dxY8RL2%m9nmj|tS-SfZh3zxwa%k~==Xs>3c>8gihi^L)~t8{dYm4u5l zOIdlB!RSXr6C%Yavn<+76AIhe0;a8)*XTHe2>(&$?{SU$?>gLbH0t2*jaIyMYPzti z_$Og^-{=>lo@IJR$2b~9Pa=Xx&pesxjFxEEas&>D)nw7=!#5NIpk$&q^;AazEgfBJ zdI1xR8H?gh(eNZZz7P|FtI%NxAWNyde?$;iSU6tb6}0M$86whRV_&SD5h?Qbl=Uqj zNJnYEtu@QaZ3MK{(zU!pg4XrK$xgD)6tdH7_Cl#|c=*lTO zSO$cYHT?Egtn*g%U0;7br4Sd3Qz>C}#Zx4aTO$nxurRaC^-K8|kJ^CAWq5*oGfdrB zX=P{{mg`W^WNQw55r@!DF=Wb$dYQ)}jQSZ|D)&Q)dSr`N^Tqtz-&=FJfMgNmvWtTc zkLEHrMn4N}%9b+C>!!yJ8;wBFK?L0nR8L0asrOsS>)48kmGBwiDG!y)g zzvvGsu^^gPg&iU>9!$RCyN><9nfT(3K~}I62?nz@j;wQ=zpr@Vx@Wy1!UVTn$6bBc z8JtQ8UD=N=Sx{F)wDGdxXzXbGd(SydQ{|JdQg~E--lpQvjydQBS?`0=jXDGA-ORm` zA)i-35(#|&=G*P{vfaGbBv!fs@bwBJqE&NstIr>m!GXmKDhT3c0sZ7MT@i}^J5QVE)f|FNmjWaugEZe zjOydedYsCVd|oAB=~GNznmgXBNZDk~gG_ijO#(W#J4ct7TZ&|B-L2c4l=89x#{{Rc zsQ7n%P2(fcNO9WQdg!~S6ptm z4z%Mo#;l4SXR?R?T23W-{;y%j$TE|RIB~?z(m*dB$ab_8T-V%Y1I3BViZd~z441s%?s=*I;wCdQZK|}k~yVZfO2*$LZ=p* zy+SK?-E!7Bv0O|JemnQo4=%HNK%6;?)vjo1cm#x>sEgtAt=i6GSjdmF3&&H5I%ijV zgAVST9k-CGM-r4I zFhcH@wAS%7RP8t(4QbomS-S*pgZgTT9>>zkunArz(E5_D>>pp^Rb9+yMk+mPEBmHK2OC74esX*lI=UK9%y=n0M?;y4Az(! z@??HP^oOFNg1W~7Dtl?UF9fKV%a>-5^>-388Af`fy-A|{ly^^KV`okIy;?r|J!Z-f znu3KhT+UyL+_D3L@`p zyip<`g>BIU9`+=|JLeb~wx=Mh9B49UE%5bcS0%yk&xGmc@`Zcy%{al$L#;gyZ;nGc^{D!yu6GZNGlTFQcRlO(!yq*6V|D(p`IOS z@yb~1hgzN3oP~D=EQvliqA@9rcu4iOlLk0c z)OsOzl)cjIMBSes2@_jZj&|h@B=pguJANL3oYOFxG3gc{yz7>)4JTDOs#%r;(GYVH zNXVWEP?hJQ5xB<+zO%JrOAxpJEoQzPa&Nf!>4Ox$TyC^C_}$a5mofQoqDUfC^Xq`p z$8esA4KOa}>u}Jj-FI}hTwh-h7aebBSAsg{#8J)>Roy81+431dJse;vdbBukamw$A zTrUmB6nioPl36@pzZB+{UQqh9ty z-(Wix`GIcElB2_Qxw5fo+-^57llC=wOy=|VO}oh(L*hZw^Fi<*zG(G|(5ab}HF#tX zy$CJq{4v_XX@9eesc%0yn@1P)_q1XH;O)Ny=<2>!qo(o@5UV)n4e3^uVAnyxwAOsU z22xezZjt6I!2%7YW^XD<<%z^aLp%Io@`yz*{y{4wEbdZoYsXr2AuI{WE#BmrRUSI4 zyWSx?B7M=B4zua9b#q12H(rXSi8FEnmD0rGSE#O<&sXPZKn6wBQ|`f->FRhARV!&; zLZC3g?aPV0I^cHORnO&>wHuVNE z%t*DvgRgY}!(t-u4b{&gyl!DT0g;M1l2tJkF+YwGGd1Wk9bBM`f={3cT^ND6+sT!K zC@xif?h`9xqQ;X}rovw@Tv8yK{te4?IJi(~pM?Y0pxEsN-$c;l^}54Drlyex8Ckvz zdXc;t(!I7zTNuC&fan!Xm-f#M_T7c0nXT$fkpY1chP|J~jkPuz0P&7|wHB+X{-&CH z*rKhgY*7lwZnZCaA8spv{Nok)pxNykxgXIbI>M9g2xF5XS83ZF7dx)nXdE6p_XbH>Q&9Li42=Dqkk?gD zzftswROTV#PMqLwdnKbveVV)SoWhIlt_e?;cM_#{)e^ilmN)=%MFvyY=v@xwz2}6< zW4>jIW^L;2f^ZC?chj3up6n!y-$rH`=83&odrrZ5m?5tZ&5&M(POoVWO#|*hE53~o zje9qawKCN*QwSSn~y%FeR3k~CMP zanQXyn@_hJzk$6+ZUTl*yAMZJA_@q2GfF%;$;*QQ>PP-c)_Vmw)YZ&C-0dSj!vtE{bF8nGFm3(9F*p$5WVE;p_ z{^6BR!_5m<^mUO{3w*iR&8c-#zgm9!K->E$>kx;+raN*74eKme*Jt1})+nvj*B>Sj z@2CIKvDPyQp7=KYnpAFKqj80|C3!$wrE!$+7Ib`#H0+tqc*Q~3|YOFDm>8gVyl-VVh%k+p!`>8kcjPhfVsS9}M3 z>0vAHMY`5P*9X@RYmf8Go5Gt;MZb>D)nFS1{2$|>9uGQ9GprrGSXY-CH*@^paglY~ zi*AjBicx){3-B||3+6V!uU{x(ZwB#nSLDe`KcBmQagk^^&BdG?h=0IKWmn+J>>`!r>7L#QwM=>j( zG&G98iT*<>IW^vjsimJbhQ@TsEZz`p+GP$>F#|!{_T3w7o-Z2Dt)_CK9+_P!%Dz$Bmamkqp_)&T-RaSv$*3_X2W*xI>wfq|Bf(>Krx z0q0$Q?GXmPtCr610Y?~7w&pWyPvfpOzn@6A0RCLtQoZ40WU-8C>dAlfWH98yl@f#m z7<1vA>F>vNBw(s1Zd2h(Wqs}iH6e$<_3 zCa{`^Jgs&UDT3USOdM(nmD@qVsDFf1bO2KjEk*nWW6W|=+)L}|B@N{O7>)%+tNDTv zg27thyLM(0$Z#)#SiEy!H!WgV^otO}#p6U_;E`(c-PHtJtl;9vZv`5gG_|5z8hWfY z7Ja;o!}~6)OCqgW`&s%PBZGl~_pIBpd02zx-WN4)Em5xs&$ZAyTD_k~9$QnK!NvMp z2m9RaVL*2sQ z&x5S(1#=yD9FLm(Hs`T}85 zmFUBM@#gx}F5o*%mcGk(CN2S5NlO4~a0ys?-~+wHyt nxP7Pl@g;z8cL_M8wFHFJECFHPb4N;+fM3u{K%{U9Acp)uC9XVH literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G2/blast_db/contigs.fasta.ntf b/tests/test_data/outputs/extract/G2/blast_db/contigs.fasta.ntf new file mode 100755 index 0000000000000000000000000000000000000000..005ac416d78d808991db961db8e272a0664078b1 GIT binary patch literal 16384 zcmeI&O=`kW5CGt5>cWK(D?Lc@1YSi@p{p+HqW9@Fr1T_$d!a|DnwR0}#*Gvzd|71X z{g6z)-S85T*RV^r-#=duIdzJkyQgkAju_6m?CR~ezpidNTg?3z?>t+V5+Fc;009C7 z2oNAZfB*pkw+qz1`!D+c`t+KWnseOrpYYhnoWK3cJb?fK0t5&UAV7cs0RjXF5J-XS zzayHM#HQV}xgruEK!5-N0t5&UAV7cs0RsO);N@6E6QfwqKVBEpx7F%>xjxnP9Q72v tW~JgB=Xra^(*N(D`o%y literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G2/blast_db/contigs.fasta.nto b/tests/test_data/outputs/extract/G2/blast_db/contigs.fasta.nto new file mode 100755 index 0000000000000000000000000000000000000000..20d5cb86e6dff1f3684dc229a358a2ea697cecfb GIT binary patch literal 8 KcmZQ%fB*mh5C8%I literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G2/filtered.hsps.txt b/tests/test_data/outputs/extract/G2/filtered.hsps.txt new file mode 100755 index 0000000..3a41218 --- /dev/null +++ b/tests/test_data/outputs/extract/G2/filtered.hsps.txt @@ -0,0 +1,21 @@ +qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore locus_name ext_start ext_end is_5prime_complete is_3prime_complete is_complete is_5prime_boundary is_3prime_boundary is_on_boundary reverse complement is_extended is_5p_extended is_3p_extended +0 0 102 22872 0 101 11 112 102 0 100.0 100 100 plus 1.84e-51 189 locus_1 11 112 True True True False False False False True False False False +9 0 762 22872 0 761 124 885 762 0 100.0 100 100 plus 0.0 1408 locus_10 124 885 True True True False False False False True False False False +10 0 858 22872 0 857 897 1754 858 0 100.0 100 100 plus 0.0 1585 locus_11 897 1754 True True True False False False False True False False False +11 0 972 22872 0 971 1766 2737 972 0 100.0 100 100 plus 0.0 1796 locus_12 1766 2737 True True True False False False False True False False False +12 0 1098 22872 0 1097 2749 3846 1098 0 100.0 100 100 plus 0.0 2028 locus_13 2749 3846 True True True False False False False True False False False +13 0 1281 22872 0 1280 3858 5138 1281 0 100.0 100 100 plus 0.0 2366 locus_14 3858 5138 True True True False False False False True False False False +14 0 1434 22872 0 1433 5150 6583 1434 0 100.0 100 100 plus 0.0 2649 locus_15 5150 6583 True True True False False False False True False False False +15 0 1464 22872 0 1463 6595 8058 1464 0 100.0 100 100 plus 0.0 2704 locus_16 6595 8058 True True True False False False False True False False False +16 0 1836 22872 0 1835 8070 9905 1836 0 100.0 100 100 plus 0.0 3391 locus_17 8070 9905 True True True False False False False True False False False +17 0 1914 22872 0 1913 9917 11830 1914 0 100.0 100 100 plus 0.0 3535 locus_18 9917 11830 True True True False False False False True False False False +18 0 2037 22872 0 2036 11842 13878 2037 0 100.0 100 100 plus 0.0 3762 locus_19 11842 13878 True True True False False False False True False False False +1 0 285 22872 0 284 13890 14174 285 0 100.0 100 100 plus 1.05e-152 527 locus_2 13890 14174 True True True False False False False True False False False +19 0 4935 22872 0 4934 14186 19120 4935 0 100.0 100 100 plus 0.0 9114 locus_20 14186 19120 True True True False False False False True False False False +2 0 327 22872 0 326 19132 19458 327 0 100.0 100 100 plus 5.47e-176 604 locus_3 19132 19458 True True True False False False False True False False False +3 0 417 22872 0 416 19470 19886 417 0 100.0 100 100 plus 0.0 771 locus_4 19470 19886 True True True False False False False True False False False +4 0 444 22872 0 443 19898 20341 444 0 100.0 100 100 plus 0.0 821 locus_5 19898 20341 True True True False False False False True False False False +5 0 543 22872 0 542 20353 20895 543 0 100.0 100 100 plus 0.0 1003 locus_6 20353 20895 True True True False False False False True False False False +6 0 606 22872 0 605 20907 21512 606 0 100.0 100 100 plus 0.0 1120 locus_7 20907 21512 True True True False False False False True False False False +7 0 642 22872 0 641 21524 22165 642 0 100.0 100 100 plus 0.0 1186 locus_8 21524 22165 True True True False False False False True False False False +8 0 684 22872 0 683 22177 22860 684 0 100.0 100 100 plus 0.0 1264 locus_9 22177 22860 True True True False False False False True False False False diff --git a/tests/test_data/outputs/extract/G2/processed.extracted.seqs.fasta b/tests/test_data/outputs/extract/G2/processed.extracted.seqs.fasta new file mode 100755 index 0000000..85881cc --- /dev/null +++ b/tests/test_data/outputs/extract/G2/processed.extracted.seqs.fasta @@ -0,0 +1,40 @@ +>locus_1:0:0:0 +atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa +>locus_10:9:0:1 +atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa +>locus_11:10:0:2 +gtgaacctggtgaaaaccccgggtctgcacgcggcgggcaaaggcattaacgttgccaatgtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccaacttcaactttcccggctttgacgtgactcccgcagactgggaacgctttgttaacgacttcctgagctggctgggtcagttcgatatggtctgcgtaagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatcgttgatactagccgtgtagagttagtcgccggtcttatagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcccgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgcgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggaaaataccgtcggcgcaggcggttccatggttggcggatggatttacggcctgctgatgcgcgagtccaccgaacatacgctgcgcctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga +>locus_12:11:0:3 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>locus_13:12:0:4 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>locus_14:13:0:5 +ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctattacgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccagcgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcggaaaggcgggattggttcgcgcggtccgggtgaaacccagctcgaagccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgagaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgcagctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaattccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtccgcaggtacgaatgccgattgtcgactggcgtcgcttctgtaaacaagaaccagcgttgatcgaatacgtgatctag +>locus_15:14:0:6 +gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga +>locus_16:15:0:7 +atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctcgtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatctgcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacggtgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcactcaaccggaagggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaaggctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgatgcgctgtcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtaagctgatcagccttgtgcgcaaccttaatcaactgctcaaaagcgagcacgaacgttataacaaataccgcacgagcctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatctgcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcatagcccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttctggccgacagcatcccacacagaaagaggaataa +>locus_17:16:0:8 +atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa +>locus_18:17:0:9 +atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag +>locus_19:18:0:10 +atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacatcgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccacagtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggtcgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattacttgaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcatacagggcggtacggaagtttgtcagcaaaaactggataacatgcgcaacgcaggcgtaaaagtgtacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggggtcagcgcagagctcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacagaagaccggcaagctcggcgagatcctgttctttatgcgggcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccagcaatgactggcagttcgacggcgacgcggaaactattgcacactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgagatttccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa +>locus_2:1:0:11 +atgaaactttttctgacaacggcagcgctaactgcgaccttgacttccggcatgggatttgccagcgataccgtgatcccctgggcgactaacagtggcggtacggaaagcacgcatatcgcggcaatgggcgaggatgtgaatgcgcagcatcagcacatcatccacacgcatgaaggcgtatgtgccgcaaattccggcaccattcaggctgatgaagccgcgctgaccagcaacaagccaccggtacaagtccaaccggaattgttgccgcatcagggctaa +>locus_20:19:0:12 +atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa +>locus_3:2:0:13 +atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa +>locus_4:3:0:14 +ctgaaactaatacctttttatttgctcgcgctattttctgcctcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaacccctgcagataacccgccttataccgtgacaatattgccgccagtagaagcctgcggggaaaaatgtgtgaaactgaacgtagagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaacgacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctacaaccgacagctgcaattacttagggctggcgtaataccgaaaaataaaaagtggaaatggataacgaagatggaaaattactggttcaggcgggtaaaagcgcactaa +>locus_5:4:0:15 +atgaaaaaatttgatagctgtcatcctgtctttttactgatcggctgtgcccaggtgcctctccgttcctccgtgagcaaaccggtacagcaacctagcgctcagaaagagcaactggccaacgcaaatggtattgatgagtgtcagtcacttccgtatgtgccttcagaccttgcgaagaataaatcattatcaaaccagatcgctgataataccgcatcaaaaaatagcgcaatcagctcacgcattttttgcgaaaaatataagcaaaccaaagaacaggcgcttaccttcttccaggaacttccacaatacatgcgttcgaaagaagttgaagagcaacacatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa +>locus_6:5:0:16 +atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa +>locus_7:6:0:17 +gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggactcacaacatgcctgtagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgccagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcgccatccagtcggcaacctggttttcggcaggcaggtcgcgtagggcggttaaaaaaatgatggattgcggcagcggtatttattcttcaatcaatacgttactgaaaaattcgcagattaaaaatatagtcattttaacccataatcattgtttgacgtatattgtaaaaaataagcgcggcgtgaagtttgatccggagtatcttaatgcgctcgttatgtatgcggaaaacggaaaactccttttagacggtgaatttgttcccggctag +>locus_8:7:0:18 +atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga +>locus_9:8:0:19 +atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/extract/G2/raw.extracted.seqs.fasta b/tests/test_data/outputs/extract/G2/raw.extracted.seqs.fasta new file mode 100755 index 0000000..85881cc --- /dev/null +++ b/tests/test_data/outputs/extract/G2/raw.extracted.seqs.fasta @@ -0,0 +1,40 @@ +>locus_1:0:0:0 +atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa +>locus_10:9:0:1 +atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa +>locus_11:10:0:2 +gtgaacctggtgaaaaccccgggtctgcacgcggcgggcaaaggcattaacgttgccaatgtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccaacttcaactttcccggctttgacgtgactcccgcagactgggaacgctttgttaacgacttcctgagctggctgggtcagttcgatatggtctgcgtaagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatcgttgatactagccgtgtagagttagtcgccggtcttatagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcccgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgcgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggaaaataccgtcggcgcaggcggttccatggttggcggatggatttacggcctgctgatgcgcgagtccaccgaacatacgctgcgcctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga +>locus_12:11:0:3 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>locus_13:12:0:4 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>locus_14:13:0:5 +ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctattacgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccagcgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcggaaaggcgggattggttcgcgcggtccgggtgaaacccagctcgaagccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgagaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgcagctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaattccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtccgcaggtacgaatgccgattgtcgactggcgtcgcttctgtaaacaagaaccagcgttgatcgaatacgtgatctag +>locus_15:14:0:6 +gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga +>locus_16:15:0:7 +atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctcgtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatctgcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacggtgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcactcaaccggaagggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaaggctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgatgcgctgtcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtaagctgatcagccttgtgcgcaaccttaatcaactgctcaaaagcgagcacgaacgttataacaaataccgcacgagcctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatctgcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcatagcccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttctggccgacagcatcccacacagaaagaggaataa +>locus_17:16:0:8 +atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa +>locus_18:17:0:9 +atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag +>locus_19:18:0:10 +atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacatcgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccacagtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggtcgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattacttgaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcatacagggcggtacggaagtttgtcagcaaaaactggataacatgcgcaacgcaggcgtaaaagtgtacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggggtcagcgcagagctcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacagaagaccggcaagctcggcgagatcctgttctttatgcgggcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccagcaatgactggcagttcgacggcgacgcggaaactattgcacactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgagatttccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa +>locus_2:1:0:11 +atgaaactttttctgacaacggcagcgctaactgcgaccttgacttccggcatgggatttgccagcgataccgtgatcccctgggcgactaacagtggcggtacggaaagcacgcatatcgcggcaatgggcgaggatgtgaatgcgcagcatcagcacatcatccacacgcatgaaggcgtatgtgccgcaaattccggcaccattcaggctgatgaagccgcgctgaccagcaacaagccaccggtacaagtccaaccggaattgttgccgcatcagggctaa +>locus_20:19:0:12 +atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa +>locus_3:2:0:13 +atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa +>locus_4:3:0:14 +ctgaaactaatacctttttatttgctcgcgctattttctgcctcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaacccctgcagataacccgccttataccgtgacaatattgccgccagtagaagcctgcggggaaaaatgtgtgaaactgaacgtagagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaacgacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctacaaccgacagctgcaattacttagggctggcgtaataccgaaaaataaaaagtggaaatggataacgaagatggaaaattactggttcaggcgggtaaaagcgcactaa +>locus_5:4:0:15 +atgaaaaaatttgatagctgtcatcctgtctttttactgatcggctgtgcccaggtgcctctccgttcctccgtgagcaaaccggtacagcaacctagcgctcagaaagagcaactggccaacgcaaatggtattgatgagtgtcagtcacttccgtatgtgccttcagaccttgcgaagaataaatcattatcaaaccagatcgctgataataccgcatcaaaaaatagcgcaatcagctcacgcattttttgcgaaaaatataagcaaaccaaagaacaggcgcttaccttcttccaggaacttccacaatacatgcgttcgaaagaagttgaagagcaacacatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa +>locus_6:5:0:16 +atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa +>locus_7:6:0:17 +gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggactcacaacatgcctgtagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgccagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcgccatccagtcggcaacctggttttcggcaggcaggtcgcgtagggcggttaaaaaaatgatggattgcggcagcggtatttattcttcaatcaatacgttactgaaaaattcgcagattaaaaatatagtcattttaacccataatcattgtttgacgtatattgtaaaaaataagcgcggcgtgaagtttgatccggagtatcttaatgcgctcgttatgtatgcggaaaacggaaaactccttttagacggtgaatttgttcccggctag +>locus_8:7:0:18 +atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga +>locus_9:8:0:19 +atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/extract/G2/seq_data.txt b/tests/test_data/outputs/extract/G2/seq_data.txt new file mode 100755 index 0000000..a2d95e9 --- /dev/null +++ b/tests/test_data/outputs/extract/G2/seq_data.txt @@ -0,0 +1,21 @@ +id seqid locus_name query_id qlen start end sub_start sub_ent ident qcovs bitscore reverse complement is_complete is_trunc fivep_trunc threep_trunc is_extended is_5p_extended is_3p_extended seq start_codon stop_codon is_stop_valid is_start_valid is_cds_valid +0 0 locus_1 0 102 11 113 11 112 100.0 100 189 False True True False False False False False False atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa atg taa True True True +1 0 locus_10 9 762 124 886 124 885 100.0 100 1408 False True True False False False False False False atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa atg taa True True True +2 0 locus_11 10 858 897 1755 897 1754 100.0 100 1585 False True True False False False False False False gtgaacctggtgaaaaccccgggtctgcacgcggcgggcaaaggcattaacgttgccaatgtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccaacttcaactttcccggctttgacgtgactcccgcagactgggaacgctttgttaacgacttcctgagctggctgggtcagttcgatatggtctgcgtaagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatcgttgatactagccgtgtagagttagtcgccggtcttatagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcccgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgcgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggaaaataccgtcggcgcaggcggttccatggttggcggatggatttacggcctgctgatgcgcgagtccaccgaacatacgctgcgcctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga gtg tga True True True +3 0 locus_12 11 972 1766 2738 1766 2737 100.0 100 1796 False True True False False False False False False atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga atg tga True True True +4 0 locus_13 12 1098 2749 3847 2749 3846 100.0 100 2028 False True True False False False False False False atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga atg tga True True True +5 0 locus_14 13 1281 3858 5139 3858 5138 100.0 100 2366 False True True False False False False False False ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctattacgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccagcgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcggaaaggcgggattggttcgcgcggtccgggtgaaacccagctcgaagccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgagaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgcagctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaattccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtccgcaggtacgaatgccgattgtcgactggcgtcgcttctgtaaacaagaaccagcgttgatcgaatacgtgatctag ttg tag True True True +6 0 locus_15 14 1434 5150 6584 5150 6583 100.0 100 2649 False True True False False False False False False gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga gtg tga True True True +7 0 locus_16 15 1464 6595 8059 6595 8058 100.0 100 2704 False True True False False False False False False atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctcgtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatctgcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacggtgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcactcaaccggaagggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaaggctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgatgcgctgtcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtaagctgatcagccttgtgcgcaaccttaatcaactgctcaaaagcgagcacgaacgttataacaaataccgcacgagcctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatctgcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcatagcccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttctggccgacagcatcccacacagaaagaggaataa atg taa True True True +8 0 locus_17 16 1836 8070 9906 8070 9905 100.0 100 3391 False True True False False False False False False atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa atg taa True True True +9 0 locus_18 17 1914 9917 11831 9917 11830 100.0 100 3535 False True True False False False False False False atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag atg tag True True True +10 0 locus_19 18 2037 11842 13879 11842 13878 100.0 100 3762 False True True False False False False False False atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacatcgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccacagtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggtcgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattacttgaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcatacagggcggtacggaagtttgtcagcaaaaactggataacatgcgcaacgcaggcgtaaaagtgtacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggggtcagcgcagagctcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacagaagaccggcaagctcggcgagatcctgttctttatgcgggcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccagcaatgactggcagttcgacggcgacgcggaaactattgcacactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgagatttccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa atg taa True True True +11 0 locus_2 1 285 13890 14175 13890 14174 100.0 100 527 False True True False False False False False False atgaaactttttctgacaacggcagcgctaactgcgaccttgacttccggcatgggatttgccagcgataccgtgatcccctgggcgactaacagtggcggtacggaaagcacgcatatcgcggcaatgggcgaggatgtgaatgcgcagcatcagcacatcatccacacgcatgaaggcgtatgtgccgcaaattccggcaccattcaggctgatgaagccgcgctgaccagcaacaagccaccggtacaagtccaaccggaattgttgccgcatcagggctaa atg taa True True True +12 0 locus_20 19 4935 14186 19121 14186 19120 100.0 100 9114 False True True False False False False False False atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa atg taa True True True +13 0 locus_3 2 327 19132 19459 19132 19458 100.0 100 604 False True True False False False False False False atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa atg taa True True True +14 0 locus_4 3 417 19470 19887 19470 19886 100.0 100 771 False True True False False False False False False ctgaaactaatacctttttatttgctcgcgctattttctgcctcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaacccctgcagataacccgccttataccgtgacaatattgccgccagtagaagcctgcggggaaaaatgtgtgaaactgaacgtagagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaacgacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctacaaccgacagctgcaattacttagggctggcgtaataccgaaaaataaaaagtggaaatggataacgaagatggaaaattactggttcaggcgggtaaaagcgcactaa ctg taa True True True +15 0 locus_5 4 444 19898 20342 19898 20341 100.0 100 821 False True True False False False False False False atgaaaaaatttgatagctgtcatcctgtctttttactgatcggctgtgcccaggtgcctctccgttcctccgtgagcaaaccggtacagcaacctagcgctcagaaagagcaactggccaacgcaaatggtattgatgagtgtcagtcacttccgtatgtgccttcagaccttgcgaagaataaatcattatcaaaccagatcgctgataataccgcatcaaaaaatagcgcaatcagctcacgcattttttgcgaaaaatataagcaaaccaaagaacaggcgcttaccttcttccaggaacttccacaatacatgcgttcgaaagaagttgaagagcaacacatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa atg taa True True True +16 0 locus_6 5 543 20353 20896 20353 20895 100.0 100 1003 False True True False False False False False False atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa atg taa True True True +17 0 locus_7 6 606 20907 21513 20907 21512 100.0 100 1120 False True True False False False False False False gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggactcacaacatgcctgtagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgccagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcgccatccagtcggcaacctggttttcggcaggcaggtcgcgtagggcggttaaaaaaatgatggattgcggcagcggtatttattcttcaatcaatacgttactgaaaaattcgcagattaaaaatatagtcattttaacccataatcattgtttgacgtatattgtaaaaaataagcgcggcgtgaagtttgatccggagtatcttaatgcgctcgttatgtatgcggaaaacggaaaactccttttagacggtgaatttgttcccggctag gtg tag True True True +18 0 locus_8 7 642 21524 22166 21524 22165 100.0 100 1186 False True True False False False False False False atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga atg tga True True True +19 0 locus_9 8 684 22177 22861 22177 22860 100.0 100 1264 False True True False False False False False False atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag atg tag True True True diff --git a/tests/test_data/outputs/extract/G3/blast/hsps.txt b/tests/test_data/outputs/extract/G3/blast/hsps.txt new file mode 100755 index 0000000..81793b0 --- /dev/null +++ b/tests/test_data/outputs/extract/G3/blast/hsps.txt @@ -0,0 +1,20 @@ +0 0 102 22872 1 102 22861 22760 102 0 100.000 100 100 minus 1.84e-51 189 +1 0 285 22872 1 285 8982 8698 285 17 94.035 100 100 minus 2.36e-124 433 +2 0 327 22872 1 327 3740 3414 327 0 100.000 100 100 minus 5.47e-176 604 +3 0 417 22872 1 417 3402 2986 417 11 97.362 100 100 minus 0.0 710 +4 0 444 22872 1 444 2974 2531 444 15 96.622 100 100 minus 0.0 737 +5 0 543 22872 1 543 2519 1977 543 0 100.000 100 100 minus 0.0 1003 +6 0 606 22872 1 606 1965 1360 606 15 97.525 100 100 minus 0.0 1037 +7 0 642 22872 1 642 1348 707 642 0 100.000 100 100 minus 0.0 1186 +8 0 684 22872 1 684 695 12 684 0 100.000 100 100 minus 0.0 1264 +9 0 762 22872 1 762 22748 21987 762 0 100.000 100 100 minus 0.0 1408 +10 0 858 22872 1 858 21975 21118 858 19 97.786 100 100 minus 0.0 1480 +11 0 972 22872 1 972 21106 20135 972 0 100.000 100 100 minus 0.0 1796 +12 0 1098 22872 1 1098 20123 19026 1098 0 100.000 100 100 minus 0.0 2028 +13 0 1281 22872 1 1281 19014 17734 1281 11 99.141 100 100 minus 0.0 2305 +14 0 1434 22872 1 1434 17722 16289 1434 0 100.000 100 100 minus 0.0 2649 +15 0 1464 22872 1 1464 16277 14814 1464 15 98.975 100 100 minus 0.0 2621 +16 0 1836 22872 1 1836 14802 12967 1836 0 100.000 100 100 minus 0.0 3391 +17 0 1914 22872 1 1914 12955 11042 1914 0 100.000 100 100 minus 0.0 3535 +18 0 2037 22872 1 2037 11030 8994 2037 16 99.215 100 100 minus 0.0 3674 +19 0 4935 22872 1 4935 8686 3752 4935 0 100.000 100 100 minus 0.0 9114 diff --git a/tests/test_data/outputs/extract/G3/blast_db/contigs.fasta b/tests/test_data/outputs/extract/G3/blast_db/contigs.fasta new file mode 100755 index 0000000..c366629 --- /dev/null +++ b/tests/test_data/outputs/extract/G3/blast_db/contigs.fasta @@ -0,0 +1,2 @@ +>0 +nnnnnnnnnnnctagaatgaaccggaatgcggctctaaactcatcccataatcgctgattaatcgccacgttaccgtaccatgaggggcggcagtcgaactcaatttaaatgacgctgatgattttggcggaacgaaggtcgctgatttcacttcatggctgttaagcgtcacactggcaaagttcatataatagggggtagggttatttacggtaataacatcccctgccgtctgccattttaattgttggctctggctatcaggcgttgatttggttaatgctggcggtcgataaataagctttatttgggtattaatggagatttccacgcggttcgcggaggcattatcatcaatagaaggaatacccttaatattgagccagtacatagactcccgatctgcaggtagaggggcgccagagcgaataacccgaatactgttcttttgcccggcatcaaggcgaaaaagaggcggggtgataataaacacctgcttatttgtgacctggggatcggcaaccgataaccatgactgaacaagattcgctttgctgtctttattctctacattgattgacgactcatcattattgccgtcgaaaaccagtcgggttccgccaacaacaatgctggcatgggcaacatggcttgtcagtaatacgaccaacacggatttctttatgtgtttcatnnnnnnnnnnntcatgccgcttgctcctgcacccacttcgtcaccgtggcgcgaatgtcgcgcatgacggcatccagcggctgggtcgcgtcaatggtgcggatacgcgagtcttgcgccgccagttccagatagcgcgcgcgagtccggttaaagaaatcaaaagattcctgctcaatgcgatccaaatcgccgcgcgctctggcgcgttttagcccgacttccggcgtgacatccagatacagcgtcaggtcaggacgaaaatcgcccagcacggcatcgcgtagtgtcgccagcatggtttgatcaatgccgcgccctcccccctgatacgcctgggtcgagagatcgtggcgatcgccgatcacccatacgccttgcgccagcgcgggtttgattaccgtttcgacgagctgtacgcgagcggcataaaacatcagcacttccgctttatcggtaatcacttcgtcgcctaccgatcggatatccagcaccagacttcttagtttttcggcaagctgcgtaccgcccggctcacgggtaaaaatcatgttacgaatacccagttgctcaagggtctccaccaccacgtcgcgcgcagtggtttttccggcgccttccaggccctcgatgacgatataattactgcccatnnnnnnnnnnnctagccgggaacaaattcaccgtctaaaaagagttttccgttttccgcatacataacgagcgcattaagatagtccggatcaaacttcacgccgcgcttattttttgcaatatacgtcaaacaatgattatgggtaaaaatgactatatttttattctgcgactttttcagtaacgtattgattgaagcataaataccgctgccgcaatccatcatttttttatccgccgtaagcgacctgcctgcggaaaaccaggttgccgactggatggtgcgcaccgtattactggaatagagattgtaattttgtatatcggcactgaaggctttacccagcgctctggcatcttgcgcaccgttgaccgtaatccccgtgctgtctgacaggcaggtattatcggaacgatcgcaccgctcggcatggcggaataacacgactaccggatgctgcttcgccagcgccgccagcgccttaccgttaatctgcggtagaccgttaccgctccaggcatgttgtgaggccaacccagcgataataaccaacgcccccgccagaatcgcgaaatagcgtttgtttttaataaagcgtagggtaaatgccagcacnnnnnnnnnnnttaaaccgcttgttcaaggctgaaactgtgacagtgaacctgcggtttcagcctgtcgttcataattttaatcgcatcgccaagctgcatggttcggcctgctatcaccacgcctggctgtgccagcagacacaacgcggcattttctcccggctcaacgaccagcaaattaacctgttccgccgtatcgcttaaccagacgcttgcggcatcacccgtgcccggggtccacatttcgccatgcgccacaaaatgccagcttttcggcatctgcggtttgagatagcgaatcgccaccagcgcattcaataccagctccgcgcgttgctctttggttaattcgaaatcccggcatttttcttcaaaggaaaaatagagcgcggcatcatccacacaaaaaccggtcgggcaaaacgcgtccggcgtaagcattttacgagagaagcgcgagcgaaaaagcataccattggcgagatcgagcatcatacgatcgtgctcttcatcataataccagcgccagttatcgtcaggtttaattcgcatnnnnnnnnnnnttataaggcttgcagtctttcatgggcagcaagtaacgtctgatatatgcttaaattcttacttccgggttcaagtagaacttttttaaattcggtcatgagttgctcttcatcttctttcgaacgcatgtattgtggatgttcctggaagaaggtaagcgcctgttctttggtttgtttatatttttcgcaaaaaatgcttgagctgattgcgctattttttgatgcggaattatcagcgttctggtttgataatgatttattcttcgcaaggtctgacggcacatacggaagagactgacactcatcaatactatttgcgttggccagttgctctttctgagcgccaggttgctgtaccggtttgctcacggaggaagggaggggcacctgggcacagccgatcagtaaaaagacaggaagacagctataaaattttttcatnnnnnnnnnnnttagtgcgcttttacctgcctgaaccagtaattttccattttcgttatccatttcccctttttatttttcggtattacgccagcccaaagtaattgcagctgtcggttataggttccaatagcgtgtccaccatacgccccttcaaatgctatcaccatgggtccctgcccttttaccccctcttgtccttcatggcaataaagcgcgttgcggtgagtcacgtcaagttcatactgtacgttcagtttcacacatttttccccgcagccttctactggcggcaatattgtcatggtataaggcgggttatctgcagtggttttccatgttccgatcaggtctttgcaggcgtttatctccgtagccccggaggcggcagaaaatagcgcgagcaaataaaaaggtattagtttcagnnnnnnnnnnnttagtcccaccaaacgtcgaaaagttcgctggttcggacttcttcaaggttgcgcgcttccagccacttacgcacaatcgcctgatgttcttcggtacatttaccgatttcctgcatacaaatcagcccttcccaggccaggtagccgctgccgtcaaacgccagtttattaggctcaataacgtcattaataaagtcatcgacagtcttatcgatctgctcttcagatgtaccttccggaaaacgccatgccaccgaaaatcctaattcctggaattcgtcaatgtgcatttttttacgcagacgacgactacggttctttgccatnnnnnnnnnnnttaaggcgtcacaatcagcagtccctcgctcgcgccggttgcccgccactgaggcgcgtacatcgattccacctgcggttgcgcaagctggtacgtccccggcgttaccgcgcgcgccaggtagaccagcgtcacgggctggccctcattgacaacgacggcagccacaaaccgatcgtcgcggaattccatatactgaatatccgcctgctgcatctgattaagcagattttgcacttcgctaccgctctccggcaggctggcgctgctgtcagccagattctggttttccagctccagcccggccgggagcaggtccaccaccagcgcatccggcacattgcgatcggccactaccgttaaccagaccagcaccagttcgccgctacgcaacgaggacagcgatttgcgctgaccatcggtccccagtatttgtcgttcaatctgcaaaacgttgctggcaggctcaggcgcagatgagggatagccgctgctatccagacgcagccatagcggctggctaccggtgttcgtcacctcaagggcggccagctgatcagcatccagattacgggtcagcgccttgtcgcccgacagcggctgcgcctctaacgaggtctgcgcctgccaggcgcccgcgctggcctgtcgcgaatgcgcggcgaggaacaaggcattgttctcctgggtagagagccagcgctgaccgaaggcctgctcagaaagcgagcttaatagcgcgttttgcgcgtccggtctgaggttgttctcttccagtaacgacaacatcagagcgttatcgcgcagagaactgccgtaatccgctatccattgccgttcgtcctgacgcggcgtattcagagccagcgtaatggcctcttcgccgcgtctggcatcacccatcgtgtttaacgcgatgcccaattgcatcagcggcagtcctgaagccgcctgactacggcgctcccagatttcgcgcagcgcgccgagcggcgctttctgctgacgcgccagtaccagcgcggcgtaagcctgagcggcaaaagtactggcctgggtattatcgctataacgaatcagcatcgtaccgggatcctgcagatagcgcagcagtcgctcattgccccggttaatggcctccggcgggacgctatatccctgctcgcccgcgcgaatgaggaaatccatcgcgtaggccgttagccagggctcttccgccccattttcatcccatagcgcaaaaccgccgttatcacgctgcatctgtagtatgcgggagatgccgatatccaccgcggcgcgccgtttttcatcgctatcgccggtaataccgagcgactgcaattgagcggcattggtatacagcgccgggaataacccgctggtggtttgttccaggcacccgtacggatatgctttcagctcgcgaatgtagcgcgccagattgagcggcggttttccgcttaacagcagttgtccctgtaacgtggctggcgagacgtttgccagatgctgctctggtacatgccagctctctcccggcgccagcgcaatgccgctatttaccgtttgggcaggccaggccggacgcacgccgatttgccactgcttatgctgcgcgccgagggtttctcccggcagattcagaccgctaatggtcgcctggatttcgccttcgccaaaaccttccagcgcgcgtaccggaacgaataaggtggtgcgcacgcccggcgccaggttgaccggttgcggctgttgactaagcagttccagtaacccactggcggcgagcgcaatattcagcgtctgcgggcggtcggtcagattggtgacgtccagcaccagtcgcgaaacatcccctcccgccagaaaacgcggcatattcagctcggcaatcactggcgcggcgacaacgactttgctttcgccgcgaccaaaatcgtccgctgtccatgcctgcgccataacccgcagttcgccgttaaagtcgccaatcggcagcgttacgaccccttcgccctgctcattgagcgtgatcggctgcgcctgctgcgcgatgatattggcatggtttaccggcggttttccgccgcgcgtaaggtcgtcgccatcgccgccaaaacgcaacgccgccagccgcccctgcccttcaatgacctggccgtaaatatcgtagatatccgcaccgtagcgtttttgaccgaagaacgcctgccacgggtccggcgtcgcgtaatcggtgatattcaatacaccgctatcgaccgcggagaccagcacgttgatctgttttggcatttcgccgtgtttaacgctggctttcaccctgacggtgagcggctgattcgggcgcattttagccgggctttccagcgccagatcgaggcggcggttgtcatcccccagcggtagatgtagtaaccccacggcgcgttttggcgtcgcggaacgagatttatcgccgggacgcaccaccagcgtactgagatagagatcgtggcgattccaggttttatccaccggaatcgtgagctccagcccctgcgccggcacgtcgatcgcctgccaccacagcggaccatcgctggactccaccatggcataacctttaccggcgaccggcgcggcgatatgcaatttcatggtgtcgcctggacgataattcgctttatccagtttgagggtgacgcgatccggacgcgccgcgccgctaccgtcgctgttatcctgccagctatagccggcccagaaacgaacgctgctgaccgtctcattcggcgctttgacctccagacggtacgcgccccattccaccgggaagctgacttttccggtttcatccgcgttcagatccagcgtctgctcgccctccaccagatctttttgatcaaactgcgactgccagccttcgctttccgaccagttccagtaatagtcgcgacgctcgcggatgagccgcacctgtaaaccggacaccgcttttttctcgccctgcgcgttggcgtaaacaatatcgaatgcggcgttgctgtcttcgtcgacaatcggttgattaacggtggtatccgtacggtagtcgtataccgctttggcggcgaactgtggacgaattcccggtaacgtatcggcaggccaaatcgcctgctctacgcgacgagtgaccggacgaccgccagactccagcaggctggcctgtagaattacctgcaacggcgaatgcgcttcttgccactggctggcagcactcacttcaccacgtccgcctttatccaacgtcagttgaacttcgtccaggctgcgcgaaagattctcttcggcaatattgccgaactggaagccaggcaacgccgcgacagcgtcgcgcagcggacgcaggaaaagttgcccttgcagggtattgccgttagcaggggcgccatacaagtaatagccgacaacggagaatttcacctcatccgcaggcgccagcggtgttttttgcgccgtcaggttgagcgccatccgctccggcataaagtcttccacgtggaaatcccaactccgcagcaaattatcgccggtgttggcgcggacatgccacaagccggtcggcgcgttgatatccagcggataattcaaacggtatagtccgttttccggctggctgacgacggtacgcatcacttgtccgtctggttttaccacttccagcttaacgggttgatcgggcagcgttttaccgtcgctatcgcgcagtaatccgttgaggataaccgtttcgcccggtcggtagagatcgcgtgggccgaacataaagaactgcttgctgtagccgggcgcgccagcgacattaaactccgacagatccagagccggaagcgtgagatcgagcagcgtggtctgcccctctttacgcgccagtaatagcgccgccgctttatcagcctccagttgcacatgtccctgcgcgtcgctcgtcgcctgcgccagcgtctgccctttatcattcagaagaacgatctcaattcccgactgcgccgcgccgttttccaggctttgcgtaaagatatccagccgactatggtaacggtgcgcggacacgccgatatcgctaagggtaaacaacgtagcggcattactatagttgtagtgtccagcctgattcattaccgccacatatacgcccgcctgttgcagcggcttaatatcgcttaatggcagcagcagtttctcacgcgtattacgcgccggattaagatcaaaacgaccggtataaaccagatccgccattttcagcagattgtcggattcccagttagagagggaactacggtactcccactggctgacaaacgacgccagcgatccgggcttaacgcggaaaaagttcacatcaacgtggttgacgttaagcgccatgaccggcagtccttccgctattttccccggtagcagcgatccccggctggcaaagccgacgctgggctggacatcacgcgtggtaatcgttttttcataagacttgccgaaggtggcgttattcagcgctttaacggcgggatcaaccgtgaccaccagcacgcgctcaggttccagatgacgtaaccttagctcttttaaatttggcgccagctcccatgcgccgtcaacgctgccgcttttcttatcaaccacgtgaaccacacgggagaaatcctgttcaggatctaaaggaattgaaaacgtcagcaccagcgtcgccgcgccgtcgagctgcgcttcggaggcgtctaatagcgtgagcgctttgccctggctttgctgcgccagcttttgtagctgtgacgggtcttgcgcgggcgagggctgcgctacggctggcgcttcgcttttagtcgtcggggcggttttatcgttgttatcgcatcccgccagcgccagcatgatcatgcaggccaccacgcgtaaatgtttcatnnnnnnnnnnnttagccctgatgcggcatcaattccgggtggccttgtaccggcggcttgttgctggtcagcgcggcttcatcagcctgaatgctgccggaattggccgcccatacgccttcatgcgtgtgggtgatctgctgatgctgcgcattcaaatcctcgcccattgccgcaatatgcgtgctttccgtaccgccgctgttagtcgcccaggggatgacgggatcgctggcaaatgccatgccggaaatcaaggtcgcagttagcgctgccgttgtcagaaaaagtttcatnnnnnnnnnnnttagatattccgtaaagaagcaaaaagtaaagcccactcgctcttcgcgcgatagaagaccggcggcttgccaatcggcgcatccacggtaatttcaccgccgtggtgagcttcgccggtccagatattcacccagtgatcttccggcaggtacagcgtccaatcgcaacgcccctgctcgtgaaccggcgcgaccagcagatcctgaccgagcagatattgatatttcagggtgtaggtagcggcatcgttctcgtagtgcaggaatagcggacgcatgaccggcagaccggtagccgcgttttgcgccaccgcctgcttgagatacggtttcagcgtggtaaagacggtggtcatgcgggcaaagtgggcaatagtttccgcgtcgccgtcgaactgccagttattgccggggcggttgccttcatgggtgcgcatcatcggcgtaaaggcgctgaaatcgcaccagcgcagcagcaactctttgctgcgcttcatgtcaaacagggtggtgtagccgccgatatcgctgtgatgcagaccatggccggtcatcgccagcgacaatgcagcaggcacgacagaggccagaccatcatcaagactccagtcaacgttctggtcgcctgcccacatcatggtggaatatttctgactgccggtgtaacccgcacgcataaagaacaggatctcgccgagcttgccggtctcctgtagcgcttcgtagttacacttcgcccacagtgcgggccaggcgttatgcatgatctctgcgctgacgccgttgtgcagatacgtgtcggtcggcagatattcgccgaaatccgccatccagccgctgcagccgagcgcgatcatgttctttttgatgacatctttgaaccagtcgtaagcttcaggattagtcagatcgaccacgccgccatagaattcgccaaactcgaccagatagtcgccgcccgtggcgtctttcgccagatagccgtgtctcgccgcctcagcgcagaggtctttatcactggcgacgtatgggttgatataagagaggaactggacgccttcttctttccactgtttgatccggctatccagctgtggatagttgtcgctattccacttccagttccacatcacgcgcttgccaaaggaggtcatgcggataccggaccagtcctgcgcccaaataccgttcacttttacgcctgcgttgcgcatggtatccagtttttgctgacaaacttccgtaccgccctgaatgccgagcgtgacgccgtcgtaaacccagtccggcagctccggctggcgacctaacagcgcagtcagtttttccagcagggcgatgtaggtgtcggcacactcaaaacgcagcgtagttttatcttcccacagcgccagttcgtgatactccggcgcgctgaagtcgaaattcatatagcagctattatcgacgtggcagtaatacttctgcgtgctgacaaaggtcggttgcgggaagaaggtccagtaatagtcgccgccggcgttctctttacagtctgcctgccaggtgacatagctggttttattacggccaacgccctgttcgctggtccacagcgggaacggcttgccgcgcaaatcgaaataagagaactgttcgccgcagccgtagatatggtcgtctggattagctgcgaggcgtaaccagatacggttatggtgcaggtcgtcgttttgcagatccaacgtcaggcgtcccgcctcatcggcggagatgcgaagggtggcgctaattgttgcgccacggctgaattgtaccagccagccgtcgggtagctcgctgacggtggcctccgttaatgcaatcttctcgttaagtttgtctttgatgctgaagttgccgcgaaacatgtcaatgtcggcaacgcccgcgccaatccacagacaggggttttcggcgctgtggcgtaaaatcaggcgctgttgccagctaagcgcaaaaccatcctgtgatgttgtcagttcaaaatcggttgaccgttgtggtagagaattcatnnnnnnnnnnnctactcatcttcaagataagtataaccgtacagtcccgcttcaaattcctcaaggaactgctgctgcaacgcatcgtccagatccgtctgttttacctggtcgcggaaatgcgttaatagcgttttcggatccagttgcacatattgcagcatatccgcaacggtatcgccttcgtccgacaactcaacctcgacactaccatccgggaagacaaacacgtcaaccgcttcagtatcgccaaacaggttgtgcatgttaccgaggatctcctgataggcgccgaccataaagaagccgagcatcggcggattctctggatcgtattccggcatcggcatcgtcgtggcgataccgtcgccatcgatatagtggtcgatagcgccatcggaatcacaggtaatatccagcagcacggcacgacgttccggtacctgatctaacccttccagcggcagcaccggaaagagctgatcgattccccacgcgtccggcatcgactggaacagcgagaagttgacgtacattttgtccgccatccgctcttgcagttcgtcgataatcgggcgatgcgcacggttttgcgggtccagttgcttctgcacttcatggcacatgctgagataaagttgctccgcccaggcgcgctcctgcaaactaaacgcgccggaagagtagccgatatgaatatcgtgcagatccatttggctatcatgcagccattcacgcagcgagcggcgggtgccaggcttatgcatctcctgccaggtttcccacagattttgcagcgcgcgcggcgcatcttcagcaggggcggtcggatccgtgtattcgttacgctccacgccgataatgttagagaccagtaccgtatggtgcgcagtgacggcgcgcccagactcggtaatcaccgtcggatgcggtaaaccatgctcttcgcaggcatcgccaatcgcccagatgatgttattggcatattcgttcaggccatagttcaccgaacagtcggactgcgagcgggtaccttcataatccacgcccagaccgccgcccacgtcgaagcactggatattaacgcccagcttatgcagctcaacatagaaacgcgcggactcgcgcacgccggtcgcgatatcgcgaatgttcgccatctgcgatcccaggtggaagtgcaacagttgcagactgtccagacgcccagcgtcacgcagggtctccaccagttgcagcacctgcgtcgccgccaggccgaattttgatttttcgccgccggaggattgccacttaccggacccctgagaggccagacgcgcacgcacgcccaggcgaggaaccacgttcaggcgctcggcctcttccagcacaatcgcgatttcagacatcttttcgatgaccagataaaccttatggcccatcttctcgccaatcagcgccagccgaatatattcacggtctttataaccattacagacgatcacgctacgggtcatgccggcatgcgccagcaccgccatcaattccgctttcgaccccgcttccagccccaacggttcaccggaatggataagggactcgatcacgcggcgatgctgattgaccttaatcggataaacgaggaagtagtcgccgttataaccgtaagattcacgcgcacgcttaaacgccgcgttaattgaacgcaaacggtgttgcaggatctgcgggaagcagaacagcgccggcagacgctgaccttgcgcttcgcgcgctttcaccagtttggcaagatcgacacgcgcttccggtacgtcgggatcggggcatacgctaatatggcccagctcgttgacgtcgtagtagttattgccccaccaggcaatattgtaagtgcgcagcatcttgctggcttcctgggagctcattgcaacctcctgcatnnnnnnnnnnnttatccgatacgactgacttcatcaaataaggtggctaacccgctgcgccgttccgttcgcgtcacaatcgcgcctgccaggatccgttcatcggcatacagcgataaccgccgccgcgcccgcgtaacagcggtatacaccagctcccgcgtcacgaccggcgaacgttggctgggtaaaatcagcgcggcgtgatcaaattcagacccctgtgatttatgtaccgtcatcgcccaggttgtatcatgttccggcagacggctgggctgaacggacttgatcgtgccgtccggcatcacaaaccagacgcgtaacccctgcccgcgatcgagcgcaataccaatatcgccgttaaatagccccaacgcgctatcgttgcgcgcaatcattaccggacgcccttcataccagcgagagtgcggatgccgctgaatttttcgttgctgcaccatcgcctgctcaatgcggtcattcagtcccctcacgccaaatgggccttcgcgcagcgcacaaagcagttgatactcattgaaagcctgaaggattgcctccggcgccgctttttcatgcagcaaccgcaggtagcgcccatagcccgccagcgcttcatccagcatcccggcataatcgtcgctgctttgcaatgtacgcttctctatatcgctaaacccctgctgaaaaacagcctggattgccgacctgtcgccacagttaattgccgccgccagcttgccgatgccagaatcgctgccgaaacggtagctcttttgcaacaaacagaggctatcgcgtaaagacgcggcttgcgttccggcccccgccggaatggcgctaccagtgagtcgacttagctgtcgggcgcgttccgccgtaaaccctgcgttgacataggcgcaaatatcgcccaacacagcgcccgcctcaacggatgccaactgatcgcgatcgccaagaaaaatgacccgcccgtgcggcggcagagcgtcaatcaaacgtgacatcatcggcaaatcaatcattgatgcctcatcgaccaccagcacgtccagatgcagcgggttgcccgcatgatggcgtaatcgctggctgccgggctgtgcgcccagcagtcggtgcagcgtactggcgtcctccggtatacgctttttctgcgcatcggtaagaggaagctgacgcaacgccgcgccgagcgactccgtcaggcgtgcggccgctttcccggttggcgccgccagccggatacggcaacgttcgccatccgccatttgaattaatgccgccagcagcttcgcgacggtggtggttttaccggtgccgggaccgcctgaaatcacggagatacggcgagttagcgctacggcggcggccaccttttgccagttcacctcgtctgtcggagggaatagcgcgtccagaatacgggataactgatcttcatctacggcgatggcctggttaacctcgttaaaaaagcgcgcaaccgtacgctcgttgcaccacatgcgattcaggtagaggcgatcgccgcacagaattaacggcgcggggctatcgccgcagctaaccgccgcagacgccagtaaccgctttttccagtcgattggcgtagccgtttcgcttatccaggcgaccagtaagggatgcgcctcctccgttaacgttaaacgcgacaacggcagacacacgtgaccttcacctgcgtcatgactaagcagcgctgccgccagcgtcacggcgggatcgtcgttaccggcgacggttaaagcaaactgggcatcaatgggccgtaagagtttttgttcaacggcctccagcaaccgcttctggattgtcatnnnnnnnnnnnttattcctctttctgtgtgggatgctgtcggccaaaaacgacctccatacgggcgccaccgagcagactgtcgctggcaatgatctgcccggcgtattgttccgtaatctcgcgcgcgacagccagccccacgccttgtcctggtcgtagggtatcggcgcgctgaccgcgatcaaacaccagggaacgtttgctgtggggtatgcctgggccgtcatcttcgacgaaaatatgcaaatgatcgtcggtctggcgagccgaaatctcgacaaactccagacaatatttacaagcgttgtccagtacgttgcccattacttcgacaaagtcgttttgctcgccgacaaaactgatctctggtgaaatatccatactgatattcacccctttacgctgataaactttattgagcgcggagatcaggttatctaacaacggcgcgacgggatgcagttcgcggcttaacaacacgccgctaccgcgcatactggcgcgatgcagataatagccgatctgctgggaaatccgactgatctgttccagcatcaccggttcagctttgctgacgctcatcttttcgttgcgtaaagagcgtaacgtactctgcaaaaccgcgagcggcgtttttaaactgtgcgtcaggtcggttagggtcgtgcggtatttgttataacgttcgcgctcgcttttgagcagttgattaaggttgcgcacaaggctggtcagctcacgcgtcgtctccggattgagcatttcgcggtgatgatcttcaagttcgcggacttcccgcgccagcgcctcgatagggcgtaagctccaccaggcggcgatccacagtaaaggaatgactaacagtaaattggcggccagcacgtatacgaaccagctccacaccatataggagcgttttagctctatcggaatggtatcgaccaccacgatggttaactgcggcatccgcgtcgtggcaggataaatatttaccgctaccgagtgggtcatctccgcatcatcgtcatcttcacgtacttctttgagtttttcctgcgcggaatggtcctcgctcaacagcgtgctggtggcgtctacgttggtttcaatttcatggaagccgttcgtttttaaccattccggttgaatgcttttaatcagccaggggatgttgcgctgcgtccataataatttgcccgtttcatcgtaaatcagcgtcatggtcgggctttgcatgtccagattttcaggcagctcaacgctgattttattattttcccatttggcgagggtataaaacaggttgctttcgccgcgcagcagacgaaacgtggttttatcaaaacttacgctatagccgaccagcgccactatgccatatgccagagaaagcaccagcacgacgccggctgtcgccagcaaaaaacgaacccgcagcgacagcggcagaaaatggcgagcaaatttattcatnnnnnnnnnnntcatttttctgtgatttgttctgcaagtcgggcaatacgccttgccattccccggaaaataaacaggtgcgccgggatcatcagtagccagtaaatcaggcccggcattccgtgtggatgccaccaggcgcgcacgtcaatttcgcggtagcggcctttatcgtgcagcgtgaagctaagccgccccagacccggcgctttcatgccaaacaagagcgtgagctgtttttctggttcgacaatgatcactttccagctatctaccgtatcgccaggcttgagcaaggtatgcgacgggcggcctttcgccagtttatgccccaccagacggtccatcgcggcgcgcgtctgccacaaaatattgccgaaaaaatagccctctttgccacccagccgatttacgacctgccatagcgccgataggctggccggggtctgcgcggtaaagcccgcctgctttggaaaatagccgtattcgggacgccagcgggcgaaggccagcgcgtcgtagccccagtcgctggagttcaccagtttttcttcttctttcagcgtgcggcgaacggcgtcatcaaaggtgataagcgtttgggggatcaacttttttaacgcggcgtcatcggccagcaaatcgtgccttaatccctggattaacgcttttgcggtagttggcggcacggaggtaatgacgtttaaaaaccagaccgaaatccagcgggtcggaaaaggcaccgggatcagcggacgccgtttaccgctgacggccataaaacgttcaaactgctgctgataacttaatacctgcggcccggcggcttccagaatacgatgctcgtgcgcagggtgctccagtaagccgaccaggtagtagagtaaattttccagggcgatgggcgtggtgcgcgaacgcacccagcgcggcggcgtgagtattggcaggttgtaaaccatgtcgcgcatgacctcaaaggcggcggagcctgcgccgacgatgatcccggcgcgtaattccgtcaccggtacgcctgcgtcgcgcagcgtgtcagccgtaagctggcgggcgcgcaggtgatcggattgctcatgcgccggcgcctgcaatgaactgaggaaaataagttgtttaaccggcgtctggcgcagcgcgtcgcgcacgttgagcgccgcctgacgctcatgggcgataaagtcgccgccttcgcccatgccgtgtaccagatagtaaacggtatcaatgtcgcgaagcagcgcgggtaaattttccggccagtgcagatcgaccttatgacaactgacgttggcgaggcgatgtttttccagacgttccacgcgccgcgccgccgcccgcacctgatgtccttgctgacttagcgcaaagaccaggtgctgaccgatatagccgctggcgccgaggaccagaatgcgttgcgccacnnnnnnnnnnnctagatcacgtattcgatcaacgctggttcttgtttacagaggcgacgccagtcgacaatcggcattcgtacctgcagactgacgctaccgtcttcctccatccactctttttctattgcctgaagctgataaaaccggcttctcagacgcccttcctgcggcagcaaacgcagcgtatgctgcgccacctcgccggaaagacgctccgtcaaagcctgaaaaagctgtggtattcccacgccgctttgcgctgaaagccaaacgcggatgggtttattctcttcatctctgtcgatacgcggttcaaagtcgtccagcatatcgattttgttcatcaccattaaggtggggatttcgtgagcgtcaatctcttcaagaacggtgtttaccgcctcgatgttttcctgcacacgaacatccgccgcatcgaccacatgcagcagcagcgtcgcctgacgcgtctcctgcagggtagctttaaaggcagccaccagatcgtacggtaaatggcggataaagcctaccgtatccgccagaacggtttcaccgacatccgctacatcaatacgacgtaacgtggggtccagcgtcgcaaatagctgatctgccgcatagacccgcgcttcagtgatctgattaaaaagggtggattttccggcgttggtatagcccaccagcgataccgtcggaacgtcggccttgatgcgcgactgccgcccctgctcacgttgcttctcaactttctccaggcgcgactgaatctgcacaatgcgattacgcagtaaacgacggtcggtttcgagctgggtttcacccggaccgcgcaaaccaatcccgcctttctgacgttcaaggtgggtccagccacgcaccagacgcgtagccagatggcgtagctgcgccagctcaacctgcaacttaccttcatgggtacgcgcacgttgggcaaaaatatctaagataagaccggtgcgatcgataacccggcactcgcacaaacgctccaggtttcgctcctgggctggactcaatgcatgatcaaacaatacgaccgctgcgccagtcgctttcacggcttccgcaatttcaactgccttaccttcacctacaaagtacttcgggtgcggtgctttacggctaccggtaatcacctgcattgcttcgacaccggcggaagagaccagagattcaaactcctggaggtcttccatatctttgtcttgcgaaaaatagatgtgtaccagtaccgcctgctcaccggcatcataacggtcaaacaannnnnnnnnnntcagccgctaaacacgttaccggcgcccggcgcgctttttaacacccagacgcgaccatagtgattataccatccggcacgatgcccggcatccgggccaatcccctggtaaatatcaaagtgctggcctttaatcgctccgccgacatccagtgcgaccatcaaacgtagctcatactgaccgctaaatttaccgttgttatccagcaacggtacttccgccaacaaggttgtgcccggcggaatgatgctgcggtcggaggcgacggatgctcgcccaatcagcggtacagcgctggcgcctttgaccggcgcaaaagattgcggtttaaagaagacgaacgacgggttctgctccagtaattcacgcacttccgcttcgctgtgcttctctccccattcgcgtatagcctgcatcgacatatcttcttttttcacttcaccgcgatcgataagcactttaccaatactgcgataaggccagccatttttaccggcataactaaagaagttcagcggactaccatcaccgaaatcaatataaccgctgccctggacatccataataaagttatccatcagcgaattactccaggccaggatgtacttatcgctcagcgcgcctgcgtagatctgggcgcgggacggtaagcgtccgcgttttggcggcatactatagatagggtactggaacgcgccctggcgcgtatggcgagcctgaacgacgggcgtatagtagcccgtgaactggacgttaccgtagttgtcggtgccttccatctgccaggcatcgataccaaactgacgcatagtgcgcgtatcgcctccggaacgtaaccagttctggacagcgttatagacgttgctttgattggtgtataaacgcggcgacgcggaacggatctggtcgacctgctcggcaaagtcaccagcattaatcggcgcgcccaccgcgtccggctggtttaccagggagaagggctgggtaaatttcccgtccttatattgctgaccgcgatcggtcggttttgatgaacaggcagccagcattgccagcattacgcctgtcgccacatattttgcccaacgtcctttcatnnnnnnnnnnntcattctgacacctccattttttgcgccattttggatgctctgtattcagggatggtggtcacaatcgcaccgactaacgcgaacagcgtaccgatgatggtgaccaggtagaccgtattgcctaatgaagggatcaattcatcaattagcactgagccaagcagttggcctgctgttgacgctacgcccagcatcaatagccctaagcctctcaccagaatcgccattagcccgatggatagcagacccagcggaccaccgagatacatccaccatgtatcgggtaactggatggtgacatggcctaatgcgatacgtatcgccagcgccgcgcccaggacacaaaagccgacgatgaagttccatgtaatggacaccagcatggagcccgttgcctcggcgactttcgcattccccgcaggctgccagccagcgagtaaccctgccaaaaaggggaggatagcgagcaggataaacgaggttgagtgccactgtggcgacacgacaaaaatggtggcgataacggcgaacaatgcgccagtaatgcgccatggcgtaaaatattttttctcctccacgccgatgccaaaacggtcgcacagcaggccggaaagaagcagagcggaaattaatgccgtttgaaaggtggcaacgcccagcgcgctggcggatgcgccttcagaaaatacgaccatcgccccgcataatcctgcaaaccaattccatagcgggatttttctctttttaatcagagtagggattgaggcgaattgctggcgtgtttctttgcgcgcaataataataaaaaacatgacgaccagaccgctggcaaacgagattactgcgcaagcattaccgtcttgtaaccaatgtcctaactgcccattaacggcagactgcatcggggaaagcataccggctaagatggtggcaagcatcagtaagggggttgagtacttattcttgttcatnnnnnnnnnnntcagttaaacggttgtaagtcgacacgcgccatcattgcggccaactgaggacgatcggtaatacccacattgctctggctgaccgccagcgcggcaaccgccgtcgccagacgcagcgtatgttcgctggactcgcgcatcagcaggccgtaaatcaatccgccaaccatggaatcgcctgcgccgacggtacttaccacgtcaaccgccggtggtttagcgatccattctcctgaggcgttaacccacagcgcgccttccgcccccagcgaaatcaccacatgagcgataccctgttcgcgtaacgcgtgcgccgcatcaatcacatctttcatttccgggagcttacgacccgcccaaatttccagttcgcggcgattcggtttcaccagccacggcgcagctttaagaccggcgactaacgcttcacggctactatcaaagataatgcatggacactggctgcgcagacgcgtcatccagtcggtgaacgcttccggactcacgccagccggtaagctaccgctgacgcagaccatatcgaactgacccagccagctcagggagtcgttaacaaagcgttcccagtctgcgggagtcacgtcaaagccggaaaagttgaagtcggtcacttcgccatctttttccgtcagcttcacgttgatgcgggtccggccctgaaccacctgaaagcggttagcgatacccagttcgctgaataattgctgaaaaccgtcctggttatctttaccgagaaaaccgccgacagtgacgtcgatgcctaagtctttcagcactttggcaacgttaatgcctttgcccgccgcgtgcagacccgtggttttcaccaggttcacnnnnnnnnnnnttaccattgcgtgccaactcccacgctgtctaaccagtctgaaaccacatcatgcgcgctgtgcgcggttaaatccacatgcaacggcgtgacggagacgtagccttcatccaccgccgcgaaatcggtatccggcccggcatcgtatttatcacccggcgggccaatccagtacaatgtattaccgcgtggatcttcctgcgggatcactttatccgctggatggcggctaccgcagcgagtcacgcggatgcctttaacctgcgctaacggtagatccgggacattcacgttgagaatacgcccggtacgcaacggctcccggcttaaccctcgcaaaagcgcgcaagtcacggctgcagccgtatcataatgctgatagccgttaagggagaccgctaatgccggaaagccgagatgacgaccttccatcgccgcggcgacagtaccggaatagatcacatcatcgcccagattcggacccgcgttaataccggaaacgacaatatccggacgcggacgcattaaggcattaacgcccagatagacgcaatcggtcggcgtccccatctgtacagcgatatcgccattatcaaaggtaaaagtacgaagcgaagattccagcgtgagggaattagacgcgccgctgcggttacgatccggggctacgacctgtacatcagcaaactcacgcagcgctttcgccagcgtttgtataccgggcgcgtgaaccccgtcatcgttactcagcaatatgcgcatnnnnnnnnnnnttagtccccttcaaggagcaatacagacacaacaataatgataaaaatggcgaaaaacgacgctgttatcatcagcgcttcaagaaacggtggatcgtacatnnnnnnnnnnn diff --git a/tests/test_data/outputs/extract/G3/blast_db/contigs.fasta.ndb b/tests/test_data/outputs/extract/G3/blast_db/contigs.fasta.ndb new file mode 100755 index 0000000000000000000000000000000000000000..5fd6f7085890c929e9fa647574a304e6e7c1d317 GIT binary patch literal 20480 zcmeI&O-jR15CGt@3T7h_@eblfJc3sVVx^$TqQ!Nu)s6HdF1xii(B|hgQK1OcMd8aL zZ(j2%Z#3(Lfo-gxxPK5vg0t5&UAV7cs0RjXF5cpex+fBMroW@J_R2I(m+4ypqiHV!bpVKM|-$ z5Jf;QFC_^mDCSj~r(kHHqhMrUWTK#8XlP|%q2TBXlwpjBgjp0B0cAOW#DSVN05MQZ GYy$w!iXO%Q literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G3/blast_db/contigs.fasta.njs b/tests/test_data/outputs/extract/G3/blast_db/contigs.fasta.njs new file mode 100755 index 0000000..c1491a4 --- /dev/null +++ b/tests/test_data/outputs/extract/G3/blast_db/contigs.fasta.njs @@ -0,0 +1,22 @@ +{ + "version": "1.2", + "dbname": "contigs.fasta", + "dbtype": "Nucleotide", + "db-version": 5, + "description": "locidex/extract/G3/blast_db/contigs.fasta", + "number-of-letters": 22872, + "number-of-sequences": 1, + "last-updated": "2024-06-10T11:08:00", + "number-of-volumes": 1, + "bytes-total": 42908, + "bytes-to-cache": 5952, + "files": [ + "contigs.fasta.ndb", + "contigs.fasta.nhr", + "contigs.fasta.nin", + "contigs.fasta.not", + "contigs.fasta.nsq", + "contigs.fasta.ntf", + "contigs.fasta.nto" + ] +} diff --git a/tests/test_data/outputs/extract/G3/blast_db/contigs.fasta.not b/tests/test_data/outputs/extract/G3/blast_db/contigs.fasta.not new file mode 100755 index 0000000000000000000000000000000000000000..d6562660b009ba390419e760e6e10a80c529e3d8 GIT binary patch literal 20 OcmZQ%fB;4)4Wa-5Gynqt literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G3/blast_db/contigs.fasta.nsq b/tests/test_data/outputs/extract/G3/blast_db/contigs.fasta.nsq new file mode 100755 index 0000000000000000000000000000000000000000..37990ba5597508bc97d186bb788c80361ffb6799 GIT binary patch literal 5808 zcmWModpwhi;~uh9$U)0E)hSGI0 zi@&Tj6J~1j^lcWF8#{mXB=J>oRSQJ@tn4kHdannW?0l6!S#v`S;`FyZYd_=uH;vJ)VFx9A#rk-0_I_KeGmNMAO9o7npU*s712-$bm&3p~gkMcJ6 z%skX~%se@RK%1ue9Ub+10vpQNxonXBQQqXH$bw>!jXYeX|pT;MWnPmF!<|@6QR6LU33`qgZxr=Tid60fnEEQ zmH)8iC(O@E+jrZ?_vtFyfON(&&(2u+5vH4}w5;EsDx9*q;>|z46kd)tEjqll1!|;q z1ke<0zvS?{Bu|LI@ge{kx+{Op5@)~xW0g+85Ud`X?Ho3s9A8y#MD54Xon^i#%19A9 ziC)iMiT(k(erbc#aj|7I(JBA&7|~IR0~eC)KdFwF2Z|i4Cm$+50-Mlp4-h`Ne_p)X z*RWpGu4;O(9wRQP(80Y=4!y3(sz=htX1^|Ys=RbW*C!M|Y9{!bods{(@(!*n$v3_= z_wCBsIlj%=saLTKs>j~qz}9q3TeUubbkk;5dcw=P6Wza)jzOorrkz9Xo-EO9Ira5)a8Wzbn(&K%UyIW} zj3A}2=2dLz-)VIHufdHQ;}@9w+GO()0}s`e@60yp3(g99~-m6yf9=VTq_fxdS+SDY4 z3#14V_ySBa7nTf;qs;+d-F!)$`?2TQE~P@^#M1yxvFaS{d{woWIcIJ8woO(JP$<{! zNq#sY-9&-soMY(j1Wa4F=Zd1Q@YZNK_y=I`T#Lb^R%=>dia6kXrM2)!5No}kJ|n%S z>QAww0~3IYWmhfkv?Sb^t~k|k<1I90TY}7_!-g=g;s|^G{Kps90U>51HRTddq>r>I zk`hwuTE@VRD<%Pg9%R>gZYR0tI9K&Lm)(Z;{ zw9rC13y$E^mV{W&eSn{6{hp4xKa+ygg3 zS5TR}2@iIiRNOX17Hk}a@MJG6nhZeHlYQf9kvjMnJ+YBE&G?zirJVR70o<98r*y*n z7zJMg{lC1`RIPI7JA|Tss`-2R#0cWq9iIt8meKr#IhRoWi+}3F%wdgKUJ>nyr{wyX z3btGB!p{_fZ9RS!qt|66Om49RPbK;CP30fXo6b0%mWnZm_(0LB+g$0*Ja;n9NUK0gf0!Uvue5+*wCHAb5MnJ5*_h}SBR$ax-4|+$8u~*NPc=GQOh{-ME*rSY4fK-bLK;;9=e8qcRb?F8>ZrgOT?jdt6#~cQE9X zkr(-bJ78#8`yZXGkiolVpftdpB17O6DHRg&q|A?M4yLRse`r#iP8s>mhh7z$anDwGt4Heb8_eZn@<%(zxA7()@E<4`Ro-cPNEEQy@04gT`KH-EZ-$?kKJhfEdym7E!gAx2vAB|B0p9xOL(P7icy5cCnl8Uj)`_emb z?~m@P(Kii~72P-4IaN6|T!xxMW`4;_Pzq}F5^4!|v_JYd3?*@gw}T5)za>m5KPrfL zfj06E?}r1dwyiNm!F|T}ez)cxpgo$*aia`EIHO5c@s%5(>EMxVLEbI1(3J1sy=>1^ zWttT?$VfmhbZICABu{d2F~Lik z0+Yp=Q^}+SKD(i}#^aLUb}Vsea2| zx(y{Q>MXz-(8yTgL-vy5ZKzm*!$JlL2dXBv(UEr&ox%M;FW#c6{`uSFRIl1Pe%3}= z{htt5@Ma~5$qXFdpQbrX_b!>I)%x-;S5F~2XB@9>SCOq74%9xZ>{i&;gHCmBleH2J z%LP3k2tS4ATk`B?$~@E@`n#{D9PUWNPpJvFhCWTYzG-%tTO+({C0`2eo2$Z-IWES= zcUQ&f;&x|o`jpLvB3+k%56wAv;af3ZYp%H;ikbRf%GeRdM&0{+$;!>AHm(;k(ZmOq zk?AW`Nt_`=U$9yUs!cqA17vg~q#80x*&6kI2qnwtyCzJ=9=TWGS9Wtm<@pI)u4#K# z1e;#@=EJBv9nVgB5`&Qw^Zu;OLx>gq_WClGe{t?&4b@dQ1E?yF(-Y#|L*XHI)Cc3} zP2=}H=OCAIwL?-KUQ8&iQ{M&EOU23=6QQNHdMH#(qk5giaHWl6WS==4dNsiSGeW4*C5**Z z$$g-JiRR^FD&?*H$_V3;?a}d4Whbd*Csbl_ruaiII#fik@!qe6+>{q{B{BFj^N=g= zm3_tIb*%yEAQnu*emkxXEI)-DvOS|IT;)=BtB*==p1gKzCxXR-yb4l_ZTYP+&=()0 zyfUUb2s&>4tHkvU#~A_6yi_J+I)*&Ub*7m&O;ssCKThAGXgEv+hM`+9*_9iw#`x0< zQoo!wsMNZ#X)kv{x4zInmevjlI^V^jf1j*m-nP1oqXf`$)FPOs1lNl4ufQ1z7V-Ey zPGtvf0i)xRKOH>6Osfw3voDtYb6Q5)^})%lRHCb+gG*l2xU$djUfC8Q)WU%K4G?Ca zOADTD9HYx6mk8GJU;5dECz(;sFskpO24F$H&cK%R!oQ?Vah6%@m!umOew^Xr+ zkyWeH;5+Mgl~Y3qO%n|0vKpXcB^*R4V~4X1B+NCR`=Sd6Xj&(QC}V0NcdLZtu$fB+ z)G?fF8ds~PTvOx~P>oO2gGGX&5^|gy&GeCEhmAzcASdcFTTN_>4nW?<5Y!hpwLYIG zKY}_K9{g5^!}T>;i#-P)HFfLRlk|g6)D+2~MI_1+ivV zR%$F3j^Led5-Yfw;jIW{U1{#!!Wjzra+m;FiD2{Dy3YyRo6tMwxIFqV-DPw!=d?Pg z|EISjcKXPJAG#{n=PUoF7nIiNj<##V-ib6uv9H42eiu(38*tbhzSF}6Iv^G{NLREP z6f@Hhsk$f=o7Ue}oI2lZ(GDBYcYgQiw4Y+|>INJW!u5+QlbR_}b=qEOY@a)cyEOCO zw`4`+rOG)s+UnuOvA6$L3P(<8<;c3kr09TjTGhd~;R27v`Zov<079X!RLLKWbYvWM zUI$h%rk~VIeT*ck%ybAk`I3iX>q~F0tOt#HpW(iOUFwkM!(I|iIoD`?{@X|Tvgdiq zuoYU~`i1xKwSnfpjp)31f&(mC_Z{~fd@KznSn6|Kg%vJW^XEY#ro~1*UhcZuny4p0 zKWC!I*TtIlbV1^MPZhI#M~R12oOXXR;qy zuG-po3q5;1UwMk{UTWD-tH`W?GZu9VUOTWaxu(cb!m+7c+J071T%jC5p><<d0#YFC-?z%|zO^#SO5X64 z9GXKH4RY4)O15cRs*5so&IErvmnaYX*~*@fD=t-mD^sL^gb)}JFnx7AB2fEJW^zKg zo@Ue#T*Mz~lv+iCb~rV&LZ90kj|J{zaS%y&s3gt7&tY|GY#^C_Sv=@<&rz^^yDxJE zD;}f+I?1OEsDDC7jrsqhdz}~p)dZ> zMHofwropEXV8e@gPuZOU@hoz=7=-!BdzJ&gY6Z*LIeoU`>rAfML^x8v*hJ}~CLy>v z>v|y{VLcl0NKsIr$%ET>>uj3upg$A?0@(ix0*#lu@!BQij1oe!)N&8H(MNKI1=nm| zytOZ2`1C>V@m8MrkOs7n!DKF(ytUx<^wsX7oq^LidEGQ|#LWOgD};Rv|DA8)o{vsj zntyg9iX$-^Ii5%jNTp%MP7#l0E_qNlOrR(k1K874x)mB|6hd2}ik;<2+EkRylipBB zq)6T11sBxyUKqR}apEgCB>@Lw)|CCnH-0P}z24-Sp=UlXEsbe=U2JATjvXcutXN5< zxk;~CHKO(^4v6oncZ+K;#q>ZwBq$?@G!q}QM2p{Kf>?c_EB9CqoUzZf5M}p4Lt)f( zlYqR)I1BpE1RfKjrZ7Y)JKhAs+YKgtc%NDAQMGl$!n%G6CRW#goSDiv^5lUv_FHRm z3IsL?&`0(DoZ)sKvH0kv|F@+cMS*t=FP0l&Wp(n`cRr*L4)z&!VVE3AroV{1xsQ4A zwBNH$JK`0OSLo6jXXd)15SOl*_&-qEat>8lAK;^wApV)}A@HzqJ;B1cl&P4UuceneLeg7T3ek5py zIiGv3#Hz3Rf7WkIv!TN!1$nG3xEze72`RYe?)u7mCbv3yT|FEG4!vo#A*x`L>>s!9 z05@w!DgCPIco<6vg3%q+6WX^caaP|}{LAB`Pom7c44=fHc}5Db6A2cedNt=|P{9eb z#Fxdrloi0&>Ey0yKk+WX(ZmIi@{&^yK8$U^)bf#a-`2d*bOtn3zUO(QC9kf*KKhQR zU^5Vt<`sRIS;Uq;{Kxdr|F(yIqVJp>sj!k(>&;eK4Ltqr%l(D17W~Vore<{O(_w$e zS$%8$4YQHXEhf`xsbqlf4M{*O?k`o^HpH35ZSNa85mF&%a?PJhExe(7Z8#VA*Eblt zzDVg%7;vfI_kGNkdALJ>#P;>ODp%uey0(<)EIPnERhyr+d93IuhIf|MU;TZbrD5=u zG$sq%B$GVCF5c|taO=y+<1Evj%i|!Ja$EArOYmy1;@AGB4RP9E_SWuzTs4t^l2H&p zRPv6Lx}gr|1gcUU)$ep4%=WBAT*5>Iwx$7PRCqf}6t!MxqSGL+^< zm$IV!`Gibe#n}W)$`vK!no~(ltoeERr<1YT@R9y%R-|*MaDU{}UomH=Dy*-K=PcsM zVq=cJK>WPDMk8eDV4$aH;NuWOnNjHwchxBOIZ!*K_E#lr+{D&%AkbZx!wT(k{~%Y<-iFDb~QElK##13m{~A4?`0I3`2Rt?GzI_w literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G3/blast_db/contigs.fasta.ntf b/tests/test_data/outputs/extract/G3/blast_db/contigs.fasta.ntf new file mode 100755 index 0000000000000000000000000000000000000000..005ac416d78d808991db961db8e272a0664078b1 GIT binary patch literal 16384 zcmeI&O=`kW5CGt5>cWK(D?Lc@1YSi@p{p+HqW9@Fr1T_$d!a|DnwR0}#*Gvzd|71X z{g6z)-S85T*RV^r-#=duIdzJkyQgkAju_6m?CR~ezpidNTg?3z?>t+V5+Fc;009C7 z2oNAZfB*pkw+qz1`!D+c`t+KWnseOrpYYhnoWK3cJb?fK0t5&UAV7cs0RjXF5J-XS zzayHM#HQV}xgruEK!5-N0t5&UAV7cs0RsO);N@6E6QfwqKVBEpx7F%>xjxnP9Q72v tW~JgB=Xra^(*N(D`o%y literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G3/blast_db/contigs.fasta.nto b/tests/test_data/outputs/extract/G3/blast_db/contigs.fasta.nto new file mode 100755 index 0000000000000000000000000000000000000000..20d5cb86e6dff1f3684dc229a358a2ea697cecfb GIT binary patch literal 8 KcmZQ%fB*mh5C8%I literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G3/filtered.hsps.txt b/tests/test_data/outputs/extract/G3/filtered.hsps.txt new file mode 100755 index 0000000..7e6aeac --- /dev/null +++ b/tests/test_data/outputs/extract/G3/filtered.hsps.txt @@ -0,0 +1,21 @@ +qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore locus_name ext_start ext_end is_5prime_complete is_3prime_complete is_complete is_5prime_boundary is_3prime_boundary is_on_boundary reverse complement is_extended is_5p_extended is_3p_extended +0 0 102 22872 0 101 22860 22759 102 0 100.0 100 100 minus 1.84e-51 189 locus_1 22759 22860 True True True False False False True False False False False +9 0 762 22872 0 761 22747 21986 762 0 100.0 100 100 minus 0.0 1408 locus_10 21986 22747 True True True False False False True False False False False +10 0 858 22872 0 857 21974 21117 858 19 97.786 100 100 minus 0.0 1480 locus_11 21117 21974 True True True False False False True False False False False +11 0 972 22872 0 971 21105 20134 972 0 100.0 100 100 minus 0.0 1796 locus_12 20134 21105 True True True False False False True False False False False +12 0 1098 22872 0 1097 20122 19025 1098 0 100.0 100 100 minus 0.0 2028 locus_13 19025 20122 True True True False False False True False False False False +13 0 1281 22872 0 1280 19013 17733 1281 11 99.141 100 100 minus 0.0 2305 locus_14 17733 19013 True True True False False False True False False False False +14 0 1434 22872 0 1433 17721 16288 1434 0 100.0 100 100 minus 0.0 2649 locus_15 16288 17721 True True True False False False True False False False False +15 0 1464 22872 0 1463 16276 14813 1464 15 98.975 100 100 minus 0.0 2621 locus_16 14813 16276 True True True False False False True False False False False +16 0 1836 22872 0 1835 14801 12966 1836 0 100.0 100 100 minus 0.0 3391 locus_17 12966 14801 True True True False False False True False False False False +17 0 1914 22872 0 1913 12954 11041 1914 0 100.0 100 100 minus 0.0 3535 locus_18 11041 12954 True True True False False False True False False False False +18 0 2037 22872 0 2036 11029 8993 2037 16 99.215 100 100 minus 0.0 3674 locus_19 8993 11029 True True True False False False True False False False False +1 0 285 22872 0 284 8981 8697 285 17 94.035 100 100 minus 2.3600000000000003e-124 433 locus_2 8697 8981 True True True False False False True False False False False +19 0 4935 22872 0 4934 8685 3751 4935 0 100.0 100 100 minus 0.0 9114 locus_20 3751 8685 True True True False False False True False False False False +2 0 327 22872 0 326 3739 3413 327 0 100.0 100 100 minus 5.47e-176 604 locus_3 3413 3739 True True True False False False True False False False False +3 0 417 22872 0 416 3401 2985 417 11 97.362 100 100 minus 0.0 710 locus_4 2985 3401 True True True False False False True False False False False +4 0 444 22872 0 443 2973 2530 444 15 96.622 100 100 minus 0.0 737 locus_5 2530 2973 True True True False False False True False False False False +5 0 543 22872 0 542 2518 1976 543 0 100.0 100 100 minus 0.0 1003 locus_6 1976 2518 True True True False False False True False False False False +6 0 606 22872 0 605 1964 1359 606 15 97.525 100 100 minus 0.0 1037 locus_7 1359 1964 True True True False False False True False False False False +7 0 642 22872 0 641 1347 706 642 0 100.0 100 100 minus 0.0 1186 locus_8 706 1347 True True True False False False True False False False False +8 0 684 22872 0 683 694 11 684 0 100.0 100 100 minus 0.0 1264 locus_9 11 694 True True True False False False True False False False False diff --git a/tests/test_data/outputs/extract/G3/processed.extracted.seqs.fasta b/tests/test_data/outputs/extract/G3/processed.extracted.seqs.fasta new file mode 100755 index 0000000..8130bd5 --- /dev/null +++ b/tests/test_data/outputs/extract/G3/processed.extracted.seqs.fasta @@ -0,0 +1,40 @@ +>locus_1:0:0:0 +atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa +>locus_10:9:0:1 +atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa +>locus_11:10:0:2 +gtgaacctggtgaaaaccacgggtctgcacgcggcgggcaaaggcattaacgttgccaaagtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccgacttcaacttttccggctttgacgtgactcccgcagactgggaacgctttgttaacgactccctgagctggctgggtcagttcgatatggtctgcgtcagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatctttgatagtagccgtgaagcgttagtcgccggtcttaaagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcacgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgtgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggtaagtaccgtcggcgcaggcgattccatggttggcggattgatttacggcctgctgatgcgcgagtccagcgaacatacgctgcgtctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga +>locus_12:11:0:3 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>locus_13:12:0:4 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>locus_14:13:0:5 +ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctcttccgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccaacgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcagaaaggcgggattggtttgcgcggtccgggtgaaacccagctcgaaaccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgaaaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgctgctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaaatccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtctgcaggtacgaatgccgattgtcgactggcgtcgcctctgtaaacaagaaccagcgttgatcgaatacgtgatctag +>locus_15:14:0:6 +gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga +>locus_16:15:0:7 +atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctggtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatcagcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacgctgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcattcaaccggaatggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaacgctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgaggcgctggcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtgagctgaccagccttgtgcgcaaccttaatcaactgctcaaaagcgagcgcgaacgttataacaaataccgcacgaccctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatcagcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcataccccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttttggccgacagcatcccacacagaaagaggaataa +>locus_17:16:0:8 +atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa +>locus_18:17:0:9 +atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag +>locus_19:18:0:10 +atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacattgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccaccgtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggccgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattactggaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcattcagggcggtacggaagtttgtcagcaaaaactggataccatgcgcaacgcaggcgtaaaagtgaacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggcgtcagcgcagagatcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacaggagaccggcaagctcggcgagatcctgttctttatgcgtgcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccggcaataactggcagttcgacggcgacgcggaaactattgcccactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgaaattaccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa +>locus_2:1:0:11 +atgaaactttttctgacaacggcagcgctaactgcgaccttgatttccggcatggcatttgccagcgatcccgtcatcccctgggcgactaacagcggcggtacggaaagcacgcatattgcggcaatgggcgaggatttgaatgcgcagcatcagcagatcacccacacgcatgaaggcgtatgggcggccaattccggcagcattcaggctgatgaagccgcgctgaccagcaacaagccgccggtacaaggccacccggaattgatgccgcatcagggctaa +>locus_20:19:0:12 +atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa +>locus_3:2:0:13 +atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa +>locus_4:3:0:14 +ctgaaactaatacctttttatttgctcgcgctattttctgccgcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaaccactgcagataacccgccttataccatgacaatattgccgccagtagaaggctgcggggaaaaatgtgtgaaactgaacgtacagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaaggacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctataaccgacagctgcaattactttgggctggcgtaataccgaaaaataaaaaggggaaatggataacgaaaatggaaaattactggttcaggcaggtaaaagcgcactaa +>locus_5:4:0:15 +atgaaaaaattttatagctgtcttcctgtctttttactgatcggctgtgcccaggtgcccctcccttcctccgtgagcaaaccggtacagcaacctggcgctcagaaagagcaactggccaacgcaaatagtattgatgagtgtcagtctcttccgtatgtgccgtcagaccttgcgaagaataaatcattatcaaaccagaacgctgataattccgcatcaaaaaatagcgcaatcagctcaagcattttttgcgaaaaatataaacaaaccaaagaacaggcgcttaccttcttccaggaacatccacaatacatgcgttcgaaagaagatgaagagcaactcatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa +>locus_6:5:0:16 +atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa +>locus_7:6:0:17 +gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggcctcacaacatgcctggagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgtcagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcaccatccagtcggcaacctggttttccgcaggcaggtcgcttacggcggataaaaaaatgatggattgcggcagcggtatttatgcttcaatcaatacgttactgaaaaagtcgcagaataaaaatatagtcatttttacccataatcattgtttgacgtatattgcaaaaaataagcgcggcgtgaagtttgatccggactatcttaatgcgctcgttatgtatgcggaaaacggaaaactctttttagacggtgaatttgttcccggctag +>locus_8:7:0:18 +atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga +>locus_9:8:0:19 +atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/extract/G3/raw.extracted.seqs.fasta b/tests/test_data/outputs/extract/G3/raw.extracted.seqs.fasta new file mode 100755 index 0000000..8130bd5 --- /dev/null +++ b/tests/test_data/outputs/extract/G3/raw.extracted.seqs.fasta @@ -0,0 +1,40 @@ +>locus_1:0:0:0 +atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa +>locus_10:9:0:1 +atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa +>locus_11:10:0:2 +gtgaacctggtgaaaaccacgggtctgcacgcggcgggcaaaggcattaacgttgccaaagtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccgacttcaacttttccggctttgacgtgactcccgcagactgggaacgctttgttaacgactccctgagctggctgggtcagttcgatatggtctgcgtcagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatctttgatagtagccgtgaagcgttagtcgccggtcttaaagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcacgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgtgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggtaagtaccgtcggcgcaggcgattccatggttggcggattgatttacggcctgctgatgcgcgagtccagcgaacatacgctgcgtctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga +>locus_12:11:0:3 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>locus_13:12:0:4 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>locus_14:13:0:5 +ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctcttccgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccaacgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcagaaaggcgggattggtttgcgcggtccgggtgaaacccagctcgaaaccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgaaaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgctgctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaaatccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtctgcaggtacgaatgccgattgtcgactggcgtcgcctctgtaaacaagaaccagcgttgatcgaatacgtgatctag +>locus_15:14:0:6 +gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga +>locus_16:15:0:7 +atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctggtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatcagcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacgctgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcattcaaccggaatggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaacgctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgaggcgctggcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtgagctgaccagccttgtgcgcaaccttaatcaactgctcaaaagcgagcgcgaacgttataacaaataccgcacgaccctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatcagcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcataccccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttttggccgacagcatcccacacagaaagaggaataa +>locus_17:16:0:8 +atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa +>locus_18:17:0:9 +atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag +>locus_19:18:0:10 +atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacattgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccaccgtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggccgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattactggaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcattcagggcggtacggaagtttgtcagcaaaaactggataccatgcgcaacgcaggcgtaaaagtgaacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggcgtcagcgcagagatcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacaggagaccggcaagctcggcgagatcctgttctttatgcgtgcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccggcaataactggcagttcgacggcgacgcggaaactattgcccactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgaaattaccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa +>locus_2:1:0:11 +atgaaactttttctgacaacggcagcgctaactgcgaccttgatttccggcatggcatttgccagcgatcccgtcatcccctgggcgactaacagcggcggtacggaaagcacgcatattgcggcaatgggcgaggatttgaatgcgcagcatcagcagatcacccacacgcatgaaggcgtatgggcggccaattccggcagcattcaggctgatgaagccgcgctgaccagcaacaagccgccggtacaaggccacccggaattgatgccgcatcagggctaa +>locus_20:19:0:12 +atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa +>locus_3:2:0:13 +atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa +>locus_4:3:0:14 +ctgaaactaatacctttttatttgctcgcgctattttctgccgcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaaccactgcagataacccgccttataccatgacaatattgccgccagtagaaggctgcggggaaaaatgtgtgaaactgaacgtacagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaaggacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctataaccgacagctgcaattactttgggctggcgtaataccgaaaaataaaaaggggaaatggataacgaaaatggaaaattactggttcaggcaggtaaaagcgcactaa +>locus_5:4:0:15 +atgaaaaaattttatagctgtcttcctgtctttttactgatcggctgtgcccaggtgcccctcccttcctccgtgagcaaaccggtacagcaacctggcgctcagaaagagcaactggccaacgcaaatagtattgatgagtgtcagtctcttccgtatgtgccgtcagaccttgcgaagaataaatcattatcaaaccagaacgctgataattccgcatcaaaaaatagcgcaatcagctcaagcattttttgcgaaaaatataaacaaaccaaagaacaggcgcttaccttcttccaggaacatccacaatacatgcgttcgaaagaagatgaagagcaactcatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa +>locus_6:5:0:16 +atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa +>locus_7:6:0:17 +gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggcctcacaacatgcctggagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgtcagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcaccatccagtcggcaacctggttttccgcaggcaggtcgcttacggcggataaaaaaatgatggattgcggcagcggtatttatgcttcaatcaatacgttactgaaaaagtcgcagaataaaaatatagtcatttttacccataatcattgtttgacgtatattgcaaaaaataagcgcggcgtgaagtttgatccggactatcttaatgcgctcgttatgtatgcggaaaacggaaaactctttttagacggtgaatttgttcccggctag +>locus_8:7:0:18 +atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga +>locus_9:8:0:19 +atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/extract/G3/seq_data.txt b/tests/test_data/outputs/extract/G3/seq_data.txt new file mode 100755 index 0000000..b2135ff --- /dev/null +++ b/tests/test_data/outputs/extract/G3/seq_data.txt @@ -0,0 +1,21 @@ +id seqid locus_name query_id qlen start end sub_start sub_ent ident qcovs bitscore reverse complement is_complete is_trunc fivep_trunc threep_trunc is_extended is_5p_extended is_3p_extended seq start_codon stop_codon is_stop_valid is_start_valid is_cds_valid +0 0 locus_1 0 102 22759 22861 22860 22759 100.0 100 189 True False True False False False False False False atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa atg taa True True True +1 0 locus_10 9 762 21986 22748 22747 21986 100.0 100 1408 True False True False False False False False False atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa atg taa True True True +2 0 locus_11 10 858 21117 21975 21974 21117 97.786 100 1480 True False True False False False False False False gtgaacctggtgaaaaccacgggtctgcacgcggcgggcaaaggcattaacgttgccaaagtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccgacttcaacttttccggctttgacgtgactcccgcagactgggaacgctttgttaacgactccctgagctggctgggtcagttcgatatggtctgcgtcagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatctttgatagtagccgtgaagcgttagtcgccggtcttaaagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcacgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgtgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggtaagtaccgtcggcgcaggcgattccatggttggcggattgatttacggcctgctgatgcgcgagtccagcgaacatacgctgcgtctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga gtg tga True True True +3 0 locus_12 11 972 20134 21106 21105 20134 100.0 100 1796 True False True False False False False False False atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga atg tga True True True +4 0 locus_13 12 1098 19025 20123 20122 19025 100.0 100 2028 True False True False False False False False False atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga atg tga True True True +5 0 locus_14 13 1281 17733 19014 19013 17733 99.141 100 2305 True False True False False False False False False ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctcttccgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccaacgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcagaaaggcgggattggtttgcgcggtccgggtgaaacccagctcgaaaccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgaaaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgctgctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaaatccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtctgcaggtacgaatgccgattgtcgactggcgtcgcctctgtaaacaagaaccagcgttgatcgaatacgtgatctag ttg tag True True True +6 0 locus_15 14 1434 16288 17722 17721 16288 100.0 100 2649 True False True False False False False False False gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga gtg tga True True True +7 0 locus_16 15 1464 14813 16277 16276 14813 98.975 100 2621 True False True False False False False False False atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctggtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatcagcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacgctgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcattcaaccggaatggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaacgctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgaggcgctggcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtgagctgaccagccttgtgcgcaaccttaatcaactgctcaaaagcgagcgcgaacgttataacaaataccgcacgaccctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatcagcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcataccccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttttggccgacagcatcccacacagaaagaggaataa atg taa True True True +8 0 locus_17 16 1836 12966 14802 14801 12966 100.0 100 3391 True False True False False False False False False atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa atg taa True True True +9 0 locus_18 17 1914 11041 12955 12954 11041 100.0 100 3535 True False True False False False False False False atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag atg tag True True True +10 0 locus_19 18 2037 8993 11030 11029 8993 99.215 100 3674 True False True False False False False False False atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacattgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccaccgtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggccgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattactggaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcattcagggcggtacggaagtttgtcagcaaaaactggataccatgcgcaacgcaggcgtaaaagtgaacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggcgtcagcgcagagatcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacaggagaccggcaagctcggcgagatcctgttctttatgcgtgcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccggcaataactggcagttcgacggcgacgcggaaactattgcccactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgaaattaccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa atg taa True True True +11 0 locus_2 1 285 8697 8982 8981 8697 94.035 100 433 True False True False False False False False False atgaaactttttctgacaacggcagcgctaactgcgaccttgatttccggcatggcatttgccagcgatcccgtcatcccctgggcgactaacagcggcggtacggaaagcacgcatattgcggcaatgggcgaggatttgaatgcgcagcatcagcagatcacccacacgcatgaaggcgtatgggcggccaattccggcagcattcaggctgatgaagccgcgctgaccagcaacaagccgccggtacaaggccacccggaattgatgccgcatcagggctaa atg taa True True True +12 0 locus_20 19 4935 3751 8686 8685 3751 100.0 100 9114 True False True False False False False False False atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa atg taa True True True +13 0 locus_3 2 327 3413 3740 3739 3413 100.0 100 604 True False True False False False False False False atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa atg taa True True True +14 0 locus_4 3 417 2985 3402 3401 2985 97.362 100 710 True False True False False False False False False ctgaaactaatacctttttatttgctcgcgctattttctgccgcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaaccactgcagataacccgccttataccatgacaatattgccgccagtagaaggctgcggggaaaaatgtgtgaaactgaacgtacagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaaggacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctataaccgacagctgcaattactttgggctggcgtaataccgaaaaataaaaaggggaaatggataacgaaaatggaaaattactggttcaggcaggtaaaagcgcactaa ctg taa True True True +15 0 locus_5 4 444 2530 2974 2973 2530 96.622 100 737 True False True False False False False False False atgaaaaaattttatagctgtcttcctgtctttttactgatcggctgtgcccaggtgcccctcccttcctccgtgagcaaaccggtacagcaacctggcgctcagaaagagcaactggccaacgcaaatagtattgatgagtgtcagtctcttccgtatgtgccgtcagaccttgcgaagaataaatcattatcaaaccagaacgctgataattccgcatcaaaaaatagcgcaatcagctcaagcattttttgcgaaaaatataaacaaaccaaagaacaggcgcttaccttcttccaggaacatccacaatacatgcgttcgaaagaagatgaagagcaactcatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa atg taa True True True +16 0 locus_6 5 543 1976 2519 2518 1976 100.0 100 1003 True False True False False False False False False atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa atg taa True True True +17 0 locus_7 6 606 1359 1965 1964 1359 97.525 100 1037 True False True False False False False False False gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggcctcacaacatgcctggagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgtcagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcaccatccagtcggcaacctggttttccgcaggcaggtcgcttacggcggataaaaaaatgatggattgcggcagcggtatttatgcttcaatcaatacgttactgaaaaagtcgcagaataaaaatatagtcatttttacccataatcattgtttgacgtatattgcaaaaaataagcgcggcgtgaagtttgatccggactatcttaatgcgctcgttatgtatgcggaaaacggaaaactctttttagacggtgaatttgttcccggctag gtg tag True True True +18 0 locus_8 7 642 706 1348 1347 706 100.0 100 1186 True False True False False False False False False atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga atg tga True True True +19 0 locus_9 8 684 11 695 694 11 100.0 100 1264 True False True False False False False False False atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag atg tag True True True diff --git a/tests/test_data/outputs/extract/G4/blast/hsps.txt b/tests/test_data/outputs/extract/G4/blast/hsps.txt new file mode 100755 index 0000000..e3d9c31 --- /dev/null +++ b/tests/test_data/outputs/extract/G4/blast/hsps.txt @@ -0,0 +1,20 @@ +0 0 102 22872 1 102 22861 22760 102 0 100.000 100 100 minus 1.84e-51 189 +1 0 285 22872 1 285 8982 8698 285 0 100.000 100 100 minus 1.05e-152 527 +2 0 327 22872 1 327 3740 3414 327 0 100.000 100 100 minus 5.47e-176 604 +3 0 417 22872 1 417 3402 2986 417 0 100.000 100 100 minus 0.0 771 +4 0 444 22872 1 444 2974 2531 444 0 100.000 100 100 minus 0.0 821 +5 0 543 22872 1 543 2519 1977 543 0 100.000 100 100 minus 0.0 1003 +6 0 606 22872 1 606 1965 1360 606 0 100.000 100 100 minus 0.0 1120 +7 0 642 22872 1 642 1348 707 642 0 100.000 100 100 minus 0.0 1186 +8 0 684 22872 1 684 695 12 684 0 100.000 100 100 minus 0.0 1264 +9 0 762 22872 1 762 22748 21987 762 0 100.000 100 100 minus 0.0 1408 +10 0 858 22872 1 858 21975 21118 858 0 100.000 100 100 minus 0.0 1585 +11 0 972 22872 1 972 21106 20135 972 0 100.000 100 100 minus 0.0 1796 +12 0 1098 22872 1 1098 20123 19026 1098 0 100.000 100 100 minus 0.0 2028 +13 0 1281 22872 1 1281 19014 17734 1281 0 100.000 100 100 minus 0.0 2366 +14 0 1434 22872 1 1434 17722 16289 1434 0 100.000 100 100 minus 0.0 2649 +15 0 1464 22872 1 1464 16277 14814 1464 0 100.000 100 100 minus 0.0 2704 +16 0 1836 22872 1 1836 14802 12967 1836 0 100.000 100 100 minus 0.0 3391 +17 0 1914 22872 1 1914 12955 11042 1914 0 100.000 100 100 minus 0.0 3535 +18 0 2037 22872 1 2037 11030 8994 2037 0 100.000 100 100 minus 0.0 3762 +19 0 4935 22872 1 4935 8686 3752 4935 0 100.000 100 100 minus 0.0 9114 diff --git a/tests/test_data/outputs/extract/G4/blast_db/contigs.fasta b/tests/test_data/outputs/extract/G4/blast_db/contigs.fasta new file mode 100755 index 0000000..d711669 --- /dev/null +++ b/tests/test_data/outputs/extract/G4/blast_db/contigs.fasta @@ -0,0 +1,2 @@ +>0 +nnnnnnnnnnnctagaatgaaccggaatgcggctctaaactcatcccataatcgctgattaatcgccacgttaccgtaccatgaggggcggcagtcgaactcaatttaaatgacgctgatgattttggcggaacgaaggtcgctgatttcacttcatggctgttaagcgtcacactggcaaagttcatataatagggggtagggttatttacggtaataacatcccctgccgtctgccattttaattgttggctctggctatcaggcgttgatttggttaatgctggcggtcgataaataagctttatttgggtattaatggagatttccacgcggttcgcggaggcattatcatcaatagaaggaatacccttaatattgagccagtacatagactcccgatctgcaggtagaggggcgccagagcgaataacccgaatactgttcttttgcccggcatcaaggcgaaaaagaggcggggtgataataaacacctgcttatttgtgacctggggatcggcaaccgataaccatgactgaacaagattcgctttgctgtctttattctctacattgattgacgactcatcattattgccgtcgaaaaccagtcgggttccgccaacaacaatgctggcatgggcaacatggcttgtcagtaatacgaccaacacggatttctttatgtgtttcatnnnnnnnnnnntcatgccgcttgctcctgcacccacttcgtcaccgtggcgcgaatgtcgcgcatgacggcatccagcggctgggtcgcgtcaatggtgcggatacgcgagtcttgcgccgccagttccagatagcgcgcgcgagtccggttaaagaaatcaaaagattcctgctcaatgcgatccaaatcgccgcgcgctctggcgcgttttagcccgacttccggcgtgacatccagatacagcgtcaggtcaggacgaaaatcgcccagcacggcatcgcgtagtgtcgccagcatggtttgatcaatgccgcgccctcccccctgatacgcctgggtcgagagatcgtggcgatcgccgatcacccatacgccttgcgccagcgcgggtttgattaccgtttcgacgagctgtacgcgagcggcataaaacatcagcacttccgctttatcggtaatcacttcgtcgcctaccgatcggatatccagcaccagacttcttagtttttcggcaagctgcgtaccgcccggctcacgggtaaaaatcatgttacgaatacccagttgctcaagggtctccaccaccacgtcgcgcgcagtggtttttccggcgccttccaggccctcgatgacgatataattactgcccatnnnnnnnnnnnctagccgggaacaaattcaccgtctaaaaggagttttccgttttccgcatacataacgagcgcattaagatactccggatcaaacttcacgccgcgcttattttttacaatatacgtcaaacaatgattatgggttaaaatgactatatttttaatctgcgaatttttcagtaacgtattgattgaagaataaataccgctgccgcaatccatcatttttttaaccgccctacgcgacctgcctgccgaaaaccaggttgccgactggatggcgcgcaccgtattactggaatagagattgtaattttgtatatcggcactgaaggctttacccagcgctctggcatcttgcgcaccgttgaccgtaatccccgtgctgtctggcaggcaggtattatcggaacgatcgcaccgctcggcatggcggaataacacgactaccggatgctgcttcgccagcgccgccagcgccttaccgttaatctgcggtagaccgttaccgctacaggcatgttgtgagtccaacccagcgataataaccaacgcccccgccagaatcgcgaaatagcgtttgtttttaataaagcgtagggtaaatgccagcacnnnnnnnnnnnttaaaccgcttgttcaaggctgaaactgtgacagtgaacctgcggtttcagcctgtcgttcataattttaatcgcatcgccaagctgcatggttcggcctgctatcaccacgcctggctgtgccagcagacacaacgcggcattttctcccggctcaacgaccagcaaattaacctgttccgccgtatcgcttaaccagacgcttgcggcatcacccgtgcccggggtccacatttcgccatgcgccacaaaatgccagcttttcggcatctgcggtttgagatagcgaatcgccaccagcgcattcaataccagctccgcgcgttgctctttggttaattcgaaatcccggcatttttcttcaaaggaaaaatagagcgcggcatcatccacacaaaaaccggtcgggcaaaacgcgtccggcgtaagcattttacgagagaagcgcgagcgaaaaagcataccattggcgagatcgagcatcatacgatcgtgctcttcatcataataccagcgccagttatcgtcaggtttaattcgcatnnnnnnnnnnnttataaggcttgcagtctttcatgggcagcaagtaacgtctgatatatgcttaaattcttacttccgggttcaagtagaacttttttaaattcggtcatgtgttgctcttcaacttctttcgaacgcatgtattgtggaagttcctggaagaaggtaagcgcctgttctttggtttgcttatatttttcgcaaaaaatgcgtgagctgattgcgctattttttgatgcggtattatcagcgatctggtttgataatgatttattcttcgcaaggtctgaaggcacatacggaagtgactgacactcatcaataccatttgcgttggccagttgctctttctgagcgctaggttgctgtaccggtttgctcacggaggaacggagaggcacctgggcacagccgatcagtaaaaagacaggatgacagctatcaaattttttcatnnnnnnnnnnnttagtgcgcttttacccgcctgaaccagtaattttccatcttcgttatccatttccactttttatttttcggtattacgccagccctaagtaattgcagctgtcggttgtaggttccaatagcgtgtccaccatacgccccttcaaatgctatcaccatgggtccctgcccttttaccccctcttgtcgttcatggcaataaagcgcgttgcggtgagtcacgtcaagttcatactctacgttcagtttcacacatttttccccgcaggcttctactggcggcaatattgtcacggtataaggcgggttatctgcaggggttttccatgttccgatcaggtctttgcaggcgtttatctccgtagccccggaggaggcagaaaatagcgcgagcaaataaaaaggtattagtttcagnnnnnnnnnnnttagtcccaccaaacgtcgaaaagttcgctggttcggacttcttcaaggttgcgcgcttccagccacttacgcacaatcgcctgatgttcttcggtacatttaccgatttcctgcatacaaatcagcccttcccaggccaggtagccgctgccgtcaaacgccagtttattaggctcaataacgtcattaataaagtcatcgacagtcttatcgatctgctcttcagatgtaccttccggaaaacgccatgccaccgaaaatcctaattcctggaattcgtcaatgtgcatttttttacgcagacgacgactacggttctttgccatnnnnnnnnnnnttaaggcgtcacaatcagcagtccctcgctcgcgccggttgcccgccactgaggcgcgtacatcgattccacctgcggttgcgcaagctggtacgtccccggcgttaccgcgcgcgccaggtagaccagcgtcacgggctggccctcattgacaacgacggcagccacaaaccgatcgtcgcggaattccatatactgaatatccgcctgctgcatctgattaagcagattttgcacttcgctaccgctctccggcaggctggcgctgctgtcagccagattctggttttccagctccagcccggccgggagcaggtccaccaccagcgcatccggcacattgcgatcggccactaccgttaaccagaccagcaccagttcgccgctacgcaacgaggacagcgatttgcgctgaccatcggtccccagtatttgtcgttcaatctgcaaaacgttgctggcaggctcaggcgcagatgagggatagccgctgctatccagacgcagccatagcggctggctaccggtgttcgtcacctcaagggcggccagctgatcagcatccagattacgggtcagcgccttgtcgcccgacagcggctgcgcctctaacgaggtctgcgcctgccaggcgcccgcgctggcctgtcgcgaatgcgcggcgaggaacaaggcattgttctcctgggtagagagccagcgctgaccgaaggcctgctcagaaagcgagcttaatagcgcgttttgcgcgtccggtctgaggttgttctcttccagtaacgacaacatcagagcgttatcgcgcagagaactgccgtaatccgctatccattgccgttcgtcctgacgcggcgtattcagagccagcgtaatggcctcttcgccgcgtctggcatcacccatcgtgtttaacgcgatgcccaattgcatcagcggcagtcctgaagccgcctgactacggcgctcccagatttcgcgcagcgcgccgagcggcgctttctgctgacgcgccagtaccagcgcggcgtaagcctgagcggcaaaagtactggcctgggtattatcgctataacgaatcagcatcgtaccgggatcctgcagatagcgcagcagtcgctcattgccccggttaatggcctccggcgggacgctatatccctgctcgcccgcgcgaatgaggaaatccatcgcgtaggccgttagccagggctcttccgccccattttcatcccatagcgcaaaaccgccgttatcacgctgcatctgtagtatgcgggagatgccgatatccaccgcggcgcgccgtttttcatcgctatcgccggtaataccgagcgactgcaattgagcggcattggtatacagcgccgggaataacccgctggtggtttgttccaggcacccgtacggatatgctttcagctcgcgaatgtagcgcgccagattgagcggcggttttccgcttaacagcagttgtccctgtaacgtggctggcgagacgtttgccagatgctgctctggtacatgccagctctctcccggcgccagcgcaatgccgctatttaccgtttgggcaggccaggccggacgcacgccgatttgccactgcttatgctgcgcgccgagggtttctcccggcagattcagaccgctaatggtcgcctggatttcgccttcgccaaaaccttccagcgcgcgtaccggaacgaataaggtggtgcgcacgcccggcgccaggttgaccggttgcggctgttgactaagcagttccagtaacccactggcggcgagcgcaatattcagcgtctgcgggcggtcggtcagattggtgacgtccagcaccagtcgcgaaacatcccctcccgccagaaaacgcggcatattcagctcggcaatcactggcgcggcgacaacgactttgctttcgccgcgaccaaaatcgtccgctgtccatgcctgcgccataacccgcagttcgccgttaaagtcgccaatcggcagcgttacgaccccttcgccctgctcattgagcgtgatcggctgcgcctgctgcgcgatgatattggcatggtttaccggcggttttccgccgcgcgtaaggtcgtcgccatcgccgccaaaacgcaacgccgccagccgcccctgcccttcaatgacctggccgtaaatatcgtagatatccgcaccgtagcgtttttgaccgaagaacgcctgccacgggtccggcgtcgcgtaatcggtgatattcaatacaccgctatcgaccgcggagaccagcacgttgatctgttttggcatttcgccgtgtttaacgctggctttcaccctgacggtgagcggctgattcgggcgcattttagccgggctttccagcgccagatcgaggcggcggttgtcatcccccagcggtagatgtagtaaccccacggcgcgttttggcgtcgcggaacgagatttatcgccgggacgcaccaccagcgtactgagatagagatcgtggcgattccaggttttatccaccggaatcgtgagctccagcccctgcgccggcacgtcgatcgcctgccaccacagcggaccatcgctggactccaccatggcataacctttaccggcgaccggcgcggcgatatgcaatttcatggtgtcgcctggacgataattcgctttatccagtttgagggtgacgcgatccggacgcgccgcgccgctaccgtcgctgttatcctgccagctatagccggcccagaaacgaacgctgctgaccgtctcattcggcgctttgacctccagacggtacgcgccccattccaccgggaagctgacttttccggtttcatccgcgttcagatccagcgtctgctcgccctccaccagatctttttgatcaaactgcgactgccagccttcgctttccgaccagttccagtaatagtcgcgacgctcgcggatgagccgcacctgtaaaccggacaccgcttttttctcgccctgcgcgttggcgtaaacaatatcgaatgcggcgttgctgtcttcgtcgacaatcggttgattaacggtggtatccgtacggtagtcgtataccgctttggcggcgaactgtggacgaattcccggtaacgtatcggcaggccaaatcgcctgctctacgcgacgagtgaccggacgaccgccagactccagcaggctggcctgtagaattacctgcaacggcgaatgcgcttcttgccactggctggcagcactcacttcaccacgtccgcctttatccaacgtcagttgaacttcgtccaggctgcgcgaaagattctcttcggcaatattgccgaactggaagccaggcaacgccgcgacagcgtcgcgcagcggacgcaggaaaagttgcccttgcagggtattgccgttagcaggggcgccatacaagtaatagccgacaacggagaatttcacctcatccgcaggcgccagcggtgttttttgcgccgtcaggttgagcgccatccgctccggcataaagtcttccacgtggaaatcccaactccgcagcaaattatcgccggtgttggcgcggacatgccacaagccggtcggcgcgttgatatccagcggataattcaaacggtatagtccgttttccggctggctgacgacggtacgcatcacttgtccgtctggttttaccacttccagcttaacgggttgatcgggcagcgttttaccgtcgctatcgcgcagtaatccgttgaggataaccgtttcgcccggtcggtagagatcgcgtgggccgaacataaagaactgcttgctgtagccgggcgcgccagcgacattaaactccgacagatccagagccggaagcgtgagatcgagcagcgtggtctgcccctctttacgcgccagtaatagcgccgccgctttatcagcctccagttgcacatgtccctgcgcgtcgctcgtcgcctgcgccagcgtctgccctttatcattcagaagaacgatctcaattcccgactgcgccgcgccgttttccaggctttgcgtaaagatatccagccgactatggtaacggtgcgcggacacgccgatatcgctaagggtaaacaacgtagcggcattactatagttgtagtgtccagcctgattcattaccgccacatatacgcccgcctgttgcagcggcttaatatcgcttaatggcagcagcagtttctcacgcgtattacgcgccggattaagatcaaaacgaccggtataaaccagatccgccattttcagcagattgtcggattcccagttagagagggaactacggtactcccactggctgacaaacgacgccagcgatccgggcttaacgcggaaaaagttcacatcaacgtggttgacgttaagcgccatgaccggcagtccttccgctattttccccggtagcagcgatccccggctggcaaagccgacgctgggctggacatcacgcgtggtaatcgttttttcataagacttgccgaaggtggcgttattcagcgctttaacggcgggatcaaccgtgaccaccagcacgcgctcaggttccagatgacgtaaccttagctcttttaaatttggcgccagctcccatgcgccgtcaacgctgccgcttttcttatcaaccacgtgaaccacacgggagaaatcctgttcaggatctaaaggaattgaaaacgtcagcaccagcgtcgccgcgccgtcgagctgcgcttcggaggcgtctaatagcgtgagcgctttgccctggctttgctgcgccagcttttgtagctgtgacgggtcttgcgcgggcgagggctgcgctacggctggcgcttcgcttttagtcgtcggggcggttttatcgttgttatcgcatcccgccagcgccagcatgatcatgcaggccaccacgcgtaaatgtttcatnnnnnnnnnnnttagccctgatgcggcaacaattccggttggacttgtaccggtggcttgttgctggtcagcgcggcttcatcagcctgaatggtgccggaatttgcggcacatacgccttcatgcgtgtggatgatgtgctgatgctgcgcattcacatcctcgcccattgccgcgatatgcgtgctttccgtaccgccactgttagtcgcccaggggatcacggtatcgctggcaaatcccatgccggaagtcaaggtcgcagttagcgctgccgttgtcagaaaaagtttcatnnnnnnnnnnnttagatattccgtaaagaagcaaaaagtaaagcccactcgctcttcgcgcgatagaagaccggcggcttgccaatcggcgcatccacggaaatctcaccgccgtggtgagcttcgccggtccagatattcacccagtgatcttccggcaggtacagcgtccaatcgcaacgcccctgctcgtgaaccggcgcgaccagcagatcctgaccgagcagatattgatatttcagggtgtaggtagcggcatcgttctcgtagtgcaggaatagcggacgcatgaccggcagaccggtagccgcgttttgcgccaccgcctgcttgagatacggtttcagcgtggtaaagacggtggtcatgcgggcaaagtgtgcaatagtttccgcgtcgccgtcgaactgccagtcattgctggggcggttgccttcatgggtgcgcatcatcggcgtaaaggcgctgaaatcgcaccagcgcagcagcaactctttgctgcgcttcatgtcaaacagggtggtgtagccgccgatatcgctgtgatgcagaccatggccggtcatcgccagcgacaatgcagcaggcacgacagaggccagaccatcatcaagactccagtcaacgttctggtcgcctgcccacatcatggtggaatatttctgactgccggtgtaacccgcccgcataaagaacaggatctcgccgagcttgccggtcttctgtagcgcttcgtagttacacttcgcccacagtgcgggccaggcgttatgcatgagctctgcgctgaccccgttgtgcagatacgtgtcggtcggcagatattcgccgaaatccgccatccagccgctgcagccgagcgcgatcatgttctttttgatgacatctttgaaccagtcgtaagcttcaggattagtcagatcgaccacgccgccatagaattcgccaaactcgaccagatagtcgccgcccgtggcgtctttcgccagatagccgtgtctcgccgcctcagcgcagaggtctttatcactggcgacgtatgggttgatataagagaggaactggacgccttcttctttccactgtttgatccggctatccagctgtggatagttgtcgctattccacttccagttccacatcacgcgcttgccaaaggaggtcatgcggataccggaccagtcctgcgcccaaataccgtacacttttacgcctgcgttgcgcatgttatccagtttttgctgacaaacttccgtaccgccctgtatgccgagcgtgacgccgtcgtaaacccagtccggcagctccggctggcgacctaacagcgcagtcagtttttccagcagggcgatgtaggtgtcggcacactcaaaacgcagcgtagttttatcttcccacagcgccagttcgtgatactccggcgcgctgaagtcgaaattcatatagcagctattatcgacgtggcagtaatacttctgcgtgctgacaaaggtcggttgcgggaagaaggtcaagtaatagtcgccgccggcgttctctttacagtctgcctgccaggtgacatagctggttttattacgaccaacgccctgttcgctggtccacagcgggaacggcttgccgcgcaaatcgaaataagagaactgttcgccgcagccgtagatatggtcgtctggattagctgcgaggcgtaaccagatacggttatggtgcaggtcgtcgttttgcagatccaacgtcaggcgtcccgcctcatcggcggagatgcgaagggtggcgctaattgttgcgccacggctgaattgtaccagccagccgtcgggtagctcgctgactgtggcctccgttaatgcaatcttctcgttaagtttgtctttgatgctgaagttgccgcgaaacatgtcgatgtcggcaacgcccgcgccaatccacagacaggggttttcggcgctgtggcgtaaaatcaggcgctgttgccagctaagcgcaaaaccatcctgtgatgttgtcagttcaaaatcggttgaccgttgtggtagagaattcatnnnnnnnnnnnctactcatcttcaagataagtataaccgtacagtcccgcttcaaattcctcaaggaactgctgctgcaacgcatcgtccagatccgtctgttttacctggtcgcggaaatgcgttaatagcgttttcggatccagttgcacatattgcagcatatccgcaacggtatcgccttcgtccgacaactcaacctcgacactaccatccgggaagacaaacacgtcaaccgcttcagtatcgccaaacaggttgtgcatgttaccgaggatctcctgataggcgccgaccataaagaagccgagcatcggcggattctctggatcgtattccggcatcggcatcgtcgtggcgataccgtcgccatcgatatagtggtcgatagcgccatcggaatcacaggtaatatccagcagcacggcacgacgttccggtacctgatctaacccttccagcggcagcaccggaaagagctgatcgattccccacgcgtccggcatcgactggaacagcgagaagttgacgtacattttgtccgccatccgctcttgcagttcgtcgataatcgggcgatgcgcacggttttgcgggtccagttgcttctgcacttcatggcacatgctgagataaagttgctccgcccaggcgcgctcctgcaaactaaacgcgccggaagagtagccgatatgaatatcgtgcagatccatttggctatcatgcagccattcacgcagcgagcggcgggtgccaggcttatgcatctcctgccaggtttcccacagattttgcagcgcgcgcggcgcatcttcagcaggggcggtcggatccgtgtattcgttacgctccacgccgataatgttagagaccagtaccgtatggtgcgcagtgacggcgcgcccagactcggtaatcaccgtcggatgcggtaaaccatgctcttcgcaggcatcgccaatcgcccagatgatgttattggcatattcgttcaggccatagttcaccgaacagtcggactgcgagcgggtaccttcataatccacgcccagaccgccgcccacgtcgaagcactggatattaacgcccagcttatgcagctcaacatagaaacgcgcggactcgcgcacgccggtcgcgatatcgcgaatgttcgccatctgcgatcccaggtggaagtgcaacagttgcagactgtccagacgcccagcgtcacgcagggtctccaccagttgcagcacctgcgtcgccgccaggccgaattttgatttttcgccgccggaggattgccacttaccggacccctgagaggccagacgcgcacgcacgcccaggcgaggaaccacgttcaggcgctcggcctcttccagcacaatcgcgatttcagacatcttttcgatgaccagataaaccttatggcccatcttctcgccaatcagcgccagccgaatatattcacggtctttataaccattacagacgatcacgctacgggtcatgccggcatgcgccagcaccgccatcaattccgctttcgaccccgcttccagccccaacggttcaccggaatggataagggactcgatcacgcggcgatgctgattgaccttaatcggataaacgaggaagtagtcgccgttataaccgtaagattcacgcgcacgcttaaacgccgcgttaattgaacgcaaacggtgttgcaggatctgcgggaagcagaacagcgccggcagacgctgaccttgcgcttcgcgcgctttcaccagtttggcaagatcgacacgcgcttccggtacgtcgggatcggggcatacgctaatatggcccagctcgttgacgtcgtagtagttattgccccaccaggcaatattgtaagtgcgcagcatcttgctggcttcctgggagctcattgcaacctcctgcatnnnnnnnnnnnttatccgatacgactgacttcatcaaataaggtggctaacccgctgcgccgttccgttcgcgtcacaatcgcgcctgccaggatccgttcatcggcatacagcgataaccgccgccgcgcccgcgtaacagcggtatacaccagctcccgcgtcacgaccggcgaacgttggctgggtaaaatcagcgcggcgtgatcaaattcagacccctgtgatttatgtaccgtcatcgcccaggttgtatcatgttccggcagacggctgggctgaacggacttgatcgtgccgtccggcatcacaaaccagacgcgtaacccctgcccgcgatcgagcgcaataccaatatcgccgttaaatagccccaacgcgctatcgttgcgcgcaatcattaccggacgcccttcataccagcgagagtgcggatgccgctgaatttttcgttgctgcaccatcgcctgctcaatgcggtcattcagtcccctcacgccaaatgggccttcgcgcagcgcacaaagcagttgatactcattgaaagcctgaaggattgcctccggcgccgctttttcatgcagcaaccgcaggtagcgcccatagcccgccagcgcttcatccagcatcccggcataatcgtcgctgctttgcaatgtacgcttctctatatcgctaaacccctgctgaaaaacagcctggattgccgacctgtcgccacagttaattgccgccgccagcttgccgatgccagaatcgctgccgaaacggtagctcttttgcaacaaacagaggctatcgcgtaaagacgcggcttgcgttccggcccccgccggaatggcgctaccagtgagtcgacttagctgtcgggcgcgttccgccgtaaaccctgcgttgacataggcgcaaatatcgcccaacacagcgcccgcctcaacggatgccaactgatcgcgatcgccaagaaaaatgacccgcccgtgcggcggcagagcgtcaatcaaacgtgacatcatcggcaaatcaatcattgatgcctcatcgaccaccagcacgtccagatgcagcgggttgcccgcatgatggcgtaatcgctggctgccgggctgtgcgcccagcagtcggtgcagcgtactggcgtcctccggtatacgctttttctgcgcatcggtaagaggaagctgacgcaacgccgcgccgagcgactccgtcaggcgtgcggccgctttcccggttggcgccgccagccggatacggcaacgttcgccatccgccatttgaattaatgccgccagcagcttcgcgacggtggtggttttaccggtgccgggaccgcctgaaatcacggagatacggcgagttagcgctacggcggcggccaccttttgccagttcacctcgtctgtcggagggaatagcgcgtccagaatacgggataactgatcttcatctacggcgatggcctggttaacctcgttaaaaaagcgcgcaaccgtacgctcgttgcaccacatgcgattcaggtagaggcgatcgccgcacagaattaacggcgcggggctatcgccgcagctaaccgccgcagacgccagtaaccgctttttccagtcgattggcgtagccgtttcgcttatccaggcgaccagtaagggatgcgcctcctccgttaacgttaaacgcgacaacggcagacacacgtgaccttcacctgcgtcatgactaagcagcgctgccgccagcgtcacggcgggatcgtcgttaccggcgacggttaaagcaaactgggcatcaatgggccgtaagagtttttgttcaacggcctccagcaaccgcttctggattgtcatnnnnnnnnnnnttattcctctttctgtgtgggatgctgtcggccagaaacgacctccatacgggcgccaccgagcagactgtcgctggcaatgatctgcccggcgtattgttccgtaatctcgcgcgcgacagccagccccacgccttgtcctggtcgtagggtatcggcgcgctgaccgcgatcaaacaccagggaacgtttgctgtgggctatgcctgggccgtcatcttcgacgaaaatatgcaaatgatcgtcggtctggcgagccgaaatctcgacaaactccagacaatatttacaagcgttgtccagtacgttgcccattacttcgacaaagtcgttttgctcgccgacaaaactgatctctggtgaaatatccatactgatattcacccctttacgcagataaactttattgagcgcggagatcaggttatctaacaacggcgcgacgggatgcagttcgcggcttaacaacacgccgctaccgcgcatactggcgcgatgcagataatagccgatctgctgggaaatccgactgatctgttccagcatcaccggttcagctttgctgacgctcatcttttcgttgcgtaaagagcgtaacgtactctgcaaaaccgcgagcggcgtttttaaactgtgcgtcaggtcggttaggctcgtgcggtatttgttataacgttcgtgctcgcttttgagcagttgattaaggttgcgcacaaggctgatcagcttacgcgtcgtctccggattgagcatttcgcggtgatgatcttcaagttcgcggacttcccgcgacagcgcatcgatagggcgtaagctccaccaggcggcgatccacagtaaaggaatgactaacagtaaattggcggccagcacgtatacgaaccagctccacaccatataggagccttttagctctatcggaatggtatcgaccaccacgatggttaactgcggcatccgcgtcgtggcaggataaatatttaccgctaccgagtgggtcatctccgcatcatcgtcatcttcacgtacttctttgagtttttcctgcgcggaatggtcctcgctcaacagcgtgctggtggcgtctacgttggtttcaatttcatggaagccgttcgtttttaacccttccggttgagtgcttttaatcagccaggggatgttgcgctgcgtccataataatttgcccgtttcatcgtaaatcaccgtcatggtcgggctttgcatgtccagattttcaggcagctcaacgcagattttattattttcccatttggcgagggtataaaacaggttgctttcgccgcgcagcagacgaaacgtggttttatcaaaacttacgctatagccgaccagcgccactatgccatatgccagagaaagcacgagcacgacgccggctgtcgccagcaaaaaacgaacccgcagcgacagcggcagaaaatggcgagcaaatttattcatnnnnnnnnnnntcatttttctgtgatttgttctgcaagtcgggcaatacgccttgccattccccggaaaataaacaggtgcgccgggatcatcagtagccagtaaatcaggcccggcattccgtgtggatgccaccaggcgcgcacgtcaatttcgcggtagcggcctttatcgtgcagcgtgaagctaagccgccccagacccggcgctttcatgccaaacaagagcgtgagctgtttttctggttcgacaatgatcactttccagctatctaccgtatcgccaggcttgagcaaggtatgcgacgggcggcctttcgccagtttatgccccaccagacggtccatcgcggcgcgcgtctgccacaaaatattgccgaaaaaatagccctctttgccacccagccgatttacgacctgccatagcgccgataggctggccggggtctgcgcggtaaagcccgcctgctttggaaaatagccgtattcgggacgccagcgggcgaaggccagcgcgtcgtagccccagtcgctggagttcaccagtttttcttcttctttcagcgtgcggcgaacggcgtcatcaaaggtgataagcgtttgggggatcaacttttttaacgcggcgtcatcggccagcaaatcgtgccttaatccctggattaacgcttttgcggtagttggcggcacggaggtaatgacgtttaaaaaccagaccgaaatccagcgggtcggaaaaggcaccgggatcagcggacgccgtttaccgctgacggccataaaacgttcaaactgctgctgataacttaatacctgcggcccggcggcttccagaatacgatgctcgtgcgcagggtgctccagtaagccgaccaggtagtagagtaaattttccagggcgatgggcgtggtgcgcgaacgcacccagcgcggcggcgtgagtattggcaggttgtaaaccatgtcgcgcatgacctcaaaggcggcggagcctgcgccgacgatgatcccggcgcgtaattccgtcaccggtacgcctgcgtcgcgcagcgtgtcagccgtaagctggcgggcgcgcaggtgatcggattgctcatgcgccggcgcctgcaatgaactgaggaaaataagttgtttaaccggcgtctggcgcagcgcgtcgcgcacgttgagcgccgcctgacgctcatgggcgataaagtcgccgccttcgcccatgccgtgtaccagatagtaaacggtatcaatgtcgcgaagcagcgcgggtaaattttccggccagtgcagatcgaccttatgacaactgacgttggcgaggcgatgtttttccagacgttccacgcgccgcgccgccgcccgcacctgatgtccttgctgacttagcgcaaagaccaggtgctgaccgatatagccgctggcgccgaggaccagaatgcgttgcgccacnnnnnnnnnnnctagatcacgtattcgatcaacgctggttcttgtttacagaagcgacgccagtcgacaatcggcattcgtacctgcggactgacgctaccgtcttcctccatccactctttttctattgcctgaagctgataaaaccggcttctcagacgcccttcctgcggcagcaaacgcagcgtatgctgcgccacctcgccggaaagacgctccgtcaaagcctgaaaaagctgtggtattcccacgccgctttgcgctgaaagccaaacgcggatgggtttattctcttcatctctgtcgatacgcggttcaaagtcgtccagcatatcgattttgttcatcaccattaaggtggggaattcgtgagcgtcaatctcttcaagaacggtgtttaccgcctcgatgttttcctgcacacgaacatccgccgcatcgaccacatgcagcagctgcgtcgcctgacgcgtctcctgcagggtagctttaaaggcagccaccagatcgtacggtaaatggcggataaagcctaccgtatccgccagaacggtctcaccgacatccgctacatcaatacgacgtaacgtggggtccagcgtcgcaaatagctgatctgccgcatagacccgcgcttcagtgatctgattaaaaagggtggattttccggcgttggtatagcccaccagcgataccgtcggaacgtcggccttgatgcgcgactgccgcccctgctcacgttgcttctcaactttctccaggcgcgactgaatctgcacaatgcgattacgcagtaaacgacggtcggcttcgagctgggtttcacccggaccgcgcgaaccaatcccgcctttccgacgttcaaggtgggtccagccacgcaccagacgcgtagccagatggcgtagctgcgccagctcaacctgcaacttaccttcatgggtacgcgcacgctgggcaaaaatatctaagataagaccggtgcgatcgataacccggcactcgcacaaacgctccaggtttcgctcctgggctggactcaatgcatgatcaaacaatacgaccgctgcgccagtcgctttcacggcttccgcaatttcaactgccttaccttcacctacaaagtacttcgggtgcggtgctttacggctaccggtaatcacctgcattgcttcgacaccggcgtaatagaccagagattcaaactcctggaggtcttccatatctttgtcttgcgaaaaatagatgtgtaccagtaccgcctgctcaccggcatcataacggtcaaacaannnnnnnnnnntcagccgctaaacacgttaccggcgcccggcgcgctttttaacacccagacgcgaccatagtgattataccatccggcacgatgcccggcatccgggccaatcccctggtaaatatcaaagtgctggcctttaatcgctccgccgacatccagtgcgaccatcaaacgtagctcatactgaccgctaaatttaccgttgttatccagcaacggtacttccgccaacaaggttgtgcccggcggaatgatgctgcggtcggaggcgacggatgctcgcccaatcagcggtacagcgctggcgcctttgaccggcgcaaaagattgcggtttaaagaagacgaacgacgggttctgctccagtaattcacgcacttccgcttcgctgtgcttctctccccattcgcgtatagcctgcatcgacatatcttcttttttcacttcaccgcgatcgataagcactttaccaatactgcgataaggccagccatttttaccggcataactaaagaagttcagcggactaccatcaccgaaatcaatataaccgctgccctggacatccataataaagttatccatcagcgaattactccaggccaggatgtacttatcgctcagcgcgcctgcgtagatctgggcgcgggacggtaagcgtccgcgttttggcggcatactatagatagggtactggaacgcgccctggcgcgtatggcgagcctgaacgacgggcgtatagtagcccgtgaactggacgttaccgtagttgtcggtgccttccatctgccaggcatcgataccaaactgacgcatagtgcgcgtatcgcctccggaacgtaaccagttctggacagcgttatagacgttgctttgattggtgtataaacgcggcgacgcggaacggatctggtcgacctgctcggcaaagtcaccagcattaatcggcgcgcccaccgcgtccggctggtttaccagggagaagggctgggtaaatttcccgtccttatattgctgaccgcgatcggtcggttttgatgaacaggcagccagcattgccagcattacgcctgtcgccacatattttgcccaacgtcctttcatnnnnnnnnnnntcattctgacacctccattttttgcgccattttggatgctctgtattcagggatggtggtcacaatcgcaccgactaacgcgaacagcgtaccgatgatggtgaccaggtagaccgtattgcctaatgaagggatcaattcatcaattagcactgagccaagcagttggcctgctgttgacgctacgcccagcatcaatagccctaagcctctcaccagaatcgccattagcccgatggatagcagacccagcggaccaccgagatacatccaccatgtatcgggtaactggatggtgacatggcctaatgcgatacgtatcgccagcgccgcgcccaggacacaaaagccgacgatgaagttccatgtaatggacaccagcatggagcccgttgcctcggcgactttcgcattccccgcaggctgccagccagcgagtaaccctgccaaaaaggggaggatagcgagcaggataaacgaggttgagtgccactgtggcgacacgacaaaaatggtggcgataacggcgaacaatgcgccagtaatgcgccatggcgtaaaatattttttctcctccacgccgatgccaaaacggtcgcacagcaggccggaaagaagcagagcggaaattaatgccgtttgaaaggtggcaacgcccagcgcgctggcggatgcgccttcagaaaatacgaccatcgccccgcataatcctgcaaaccaattccatagcgggatttttctctttttaatcagagtagggattgaggcgaattgctggcgtgtttctttgcgcgcaataataataaaaaacatgacgaccagaccgctggcaaacgagattactgcgcaagcattaccgtcttgtaaccaatgtcctaactgcccattaacggcagactgcatcggggaaagcataccggctaagatggtggcaagcatcagtaagggggttgagtacttattcttgttcatnnnnnnnnnnntcagttaaacggttgtaagtcgacacgcgccatcattgcggccaactgaggacgatcggtaatacccacattgctctggctgaccgccagcgcggcaaccgccgtcgccaggcgcagcgtatgttcggtggactcgcgcatcagcaggccgtaaatccatccgccaaccatggaaccgcctgcgccgacggtattttccacgtcaaccgccggtggtttagcgatccattctcctgaggcgttaacccgcagcgcgccttccgcccccagcgaaatcaccacatgagcgataccctgttcgcgtaacgcgggcgccgcatcaatcacatctttcatttccgggagcttacgacccgcccaaatttccagttcgcggcgattcggtttcaccagccacggcgcagctataagaccggcgactaactctacacggctagtatcaacgataatgcatggacactggctgcgcagacgcgtcatccagtcggtgaacgcttccggactcacgccagccggtaagctaccgcttacgcagaccatatcgaactgacccagccagctcaggaagtcgttaacaaagcgttcccagtctgcgggagtcacgtcaaagccgggaaagttgaagttggtcacttcgccatctttttccgtcagcttcacgttgatgcgggtccggccctgaaccacctgaaagcggttagcgatacccagttcgctgaataattgctgaaaaccgtcctggttatctttaccgagaaaaccgccgacagtgacgtcgatgcctaagtctttcagcacattggcaacgttaatgcctttgcccgccgcgtgcagacccggggttttcaccaggttcacnnnnnnnnnnnttaccattgcgtgccaactcccacgctgtctaaccagtctgaaaccacatcatgcgcgctgtgcgcggttaaatccacatgcaacggcgtgacggagacgtagccttcatccaccgccgcgaaatcggtatccggcccggcatcgtatttatcacccggcgggccaatccagtacaatgtattaccgcgtggatcttcctgcgggatcactttatccgctggatggcggctaccgcagcgagtcacgcggatgcctttaacctgcgctaacggtagatccgggacattcacgttgagaatacgcccggtacgcaacggctcccggcttaaccctcgcaaaagcgcgcaagtcacggctgcagccgtatcataatgctgatagccgttaagggagaccgctaatgccggaaagccgagatgacgaccttccatcgccgcggcgacagtaccggaatagatcacatcatcgcccagattcggacccgcgttaataccggaaacgacaatatccggacgcggacgcattaaggcattaacgcccagatagacgcaatcggtcggcgtccccatctgtacagcgatatcgccattatcaaaggtaaaagtacgaagcgaagattccagcgtgagggaattagacgcgccgctgcggttacgatccggggctacgacctgtacatcagcaaactcacgcagcgctttcgccagcgtttgtataccgggcgcgtgaaccccgtcatcgttactcagcaatatgcgcatnnnnnnnnnnnttagtccccttcaaggagcaatacagacacaacaataatgataaaaatggcgaaaaacgacgctgttatcatcagcgcttcaagaaacggtggatcgtacatnnnnnnnnnnn diff --git a/tests/test_data/outputs/extract/G4/blast_db/contigs.fasta.ndb b/tests/test_data/outputs/extract/G4/blast_db/contigs.fasta.ndb new file mode 100755 index 0000000000000000000000000000000000000000..5fd6f7085890c929e9fa647574a304e6e7c1d317 GIT binary patch literal 20480 zcmeI&O-jR15CGt@3T7h_@eblfJc3sVVx^$TqQ!Nu)s6HdF1xii(B|hgQK1OcMd8aL zZ(j2%Z#3(Lfo-gxxPK5vg0t5&UAV7cs0RjXF5cpex+fBMroW@J_R2I(m+4ypqiHV!bpVKM|-$ z5Jf;QFC_^mDCSj~r(kHHqhMrUWTK#8XlP|%so>}flwpjBgjp0B0cAOW#DSVN05MQZ GYy$w#B_7BC literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G4/blast_db/contigs.fasta.njs b/tests/test_data/outputs/extract/G4/blast_db/contigs.fasta.njs new file mode 100755 index 0000000..5f1ea63 --- /dev/null +++ b/tests/test_data/outputs/extract/G4/blast_db/contigs.fasta.njs @@ -0,0 +1,22 @@ +{ + "version": "1.2", + "dbname": "contigs.fasta", + "dbtype": "Nucleotide", + "db-version": 5, + "description": "locidex/extract/G4/blast_db/contigs.fasta", + "number-of-letters": 22872, + "number-of-sequences": 1, + "last-updated": "2024-06-10T11:09:00", + "number-of-volumes": 1, + "bytes-total": 42908, + "bytes-to-cache": 5952, + "files": [ + "contigs.fasta.ndb", + "contigs.fasta.nhr", + "contigs.fasta.nin", + "contigs.fasta.not", + "contigs.fasta.nsq", + "contigs.fasta.ntf", + "contigs.fasta.nto" + ] +} diff --git a/tests/test_data/outputs/extract/G4/blast_db/contigs.fasta.not b/tests/test_data/outputs/extract/G4/blast_db/contigs.fasta.not new file mode 100755 index 0000000000000000000000000000000000000000..d6562660b009ba390419e760e6e10a80c529e3d8 GIT binary patch literal 20 OcmZQ%fB;4)4Wa-5Gynqt literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G4/blast_db/contigs.fasta.nsq b/tests/test_data/outputs/extract/G4/blast_db/contigs.fasta.nsq new file mode 100755 index 0000000000000000000000000000000000000000..f730863e3bf63c085cf1777e955c0744e01b99fb GIT binary patch literal 5808 zcmWModpwhi;~uh9$U)2arc;&WKKM;v(C`SHQZxX7+2aVT_NZ zSN6ez1nLaM|HzpCW7t^E%4dPJf8;IhiUN3#ngcG{;NVzUtAP=!n36^)^p?Za z3|MOyvnZNywJ}X`2(r+F0Lu{mUOC{t>Oz1|L{OZ|@5I`qa^-RUMCd21yF5U2Vaw7dliWf@BgoEwR(+V2@#Vd)lto{^ltN^MjTQE$%#7 zWbxtU$xOI@lU(1v7R`~3U!navY>ss-^$y!*u7Qn%6_epV&lqg zD^E`YCub_hH|&_U3p&fCP@;5#)G_a85lp{z?sm`a)fb~ja)c6~a@I}Z-;U{ycr-V= z)8Wu48t-3OGh5YVaW-K_2;oRTxun&N_dHrs5fu9Mm*e5wZhdqF^u7FIZ%4BPwdrKbO4$3qh8(d^1}>wRe8l=07Woud)bF~Y$>u5ZC-L{a~sr5 z?FytSSpKPzcZgmPf$dEIv@|!~stwMF4aO^-0U4|w|Ij^RL_W5nT#wp^qq#`^P~_1P zbPBDJwH*6Ba_!c}c$h z&3eC{oQa7oF3!gk8c;p{2KRsMnV61B%N>^6OEXIC3U{L}S6{9e@LF*-Xgl;s_?eV$ z-lz@BYL%`W(R!(xd%zPLHZn7l{%ttk`#bR{bjo|iCG5_LGVP|5U+JMGok)AqF98E> z&VSQ`m44cn@wI0sJXZgwlzf6{kO8~RqpbA ze;qNZP0)5@X`lfSg2d0IM&(6IqfeKkRv6BDz^U+?4t9cQZ^*HK=RjA>r_Pp<;0q&f z&JP{jEW3yeC7w1qv-sYl zx|B%11VMtHgDIARvf&A|CE%xDC{FM=`YhMATo5w(Bv4zbI!irQQ!8T3+1b45kX8a@ z@-+vdKQ4r3rogk$()G6i=53s_C9#*etJFOFeXxJ7&1g!eJtHVh6!@;%PVgg$wcAIV zmE2VgrtP8ulYpyDPd)CGIMS4+IN5dm4K%&844KV@jbU!pVb=V)f1Y0h1eo=dw2NG! zA=05pNlI^M8wa~Cn+5XwknL+Z-K4%_9M!9QTJw+{uKVUV#Lo!XgrD!?PWw&`(e|X< zm0z)>h8N43a1@WaB*3!o0lZ|pcQn+!*^K7CimqG$FS+%Kxi6k6AD$+Enms4FQPD^F zDH1l`VHCpONw?c|UKZgs9Nt@^&shjyxHbKiXv5i0gb|K*n2INNV{q2K?`WRQrqC^? z$cztPr8SW~1vAP@?m--6^N!ZAC|tw*dZIB^z|CI;2{Rc_r|r*2B0$M>#d5TU$-a4` zl;g!_EC}GJJ4=yFk8M*1gG#Yf|`6Q?hgvlC@}xI3vp z>5Ta?2EGage0ig-S>eog350_b%XhTNQN*h^F&l(zVtGk(uHn4r|29Tg!dj8MD%J~6 z&i6OxTia~M&lZCneg0Kr*Q8|(PN^7AA^P#mhTmI!A>Yad&9 zkh_{`JH6$MzhR4qWD8Q@IosNs|K>`jEKe)4)JJM-N(Yt1`{fJAQyed;q_xP!;`5`v zo>A60US~X{=9T!HDv2@3YkM+X0 z#FreBC$%Ft2!3|;B6@+8=cGh8@1ZZ&*5+IGglxV4pyBfosSw>zcpIn1$or7puI)Rz z=yLMt^FsdZ(?<=gf6lbKjR2QFZHch~?W*`!N0_M(%FcgAf(;Q!*k>ng_nV`@@-avO zrc7tMD09$mzAvvJaP&%0HeYu?81-YFr6M69s;^>Kr%w~@(2tHyb3nTCGi2smR^ATO zcH=L&1GXJ$a6gt)Y%qYvH0uP)!d7gH#l_*RS8x3h1R-*vH*;?a^)O(|7s(fUrwc?UQN_22g_*ZSuP=RBH7! zkbSr!+DAarUKk)gG!OVykD{zTv%?dWVhg5lAhA87x}MUFdq-7Mgfj@BZOY8ASM#{H z6=!QjS;GIu5_wwu*FIJx2-!}C1`z|2jaQs@$u5*l@_aUuHCyi`)y@lAklJA&-F>=h zm1c&`BK%%mG*iT24Z6z1rn!HmW=CU)^Lxs6657!-uy7`?P(4ru~NR z=6aM+5tzi?T3~G{wQi zg)V7}%(5>9C)*Dd@3DyqARWz>d;er!ASOkN#^r&%sjoRFae?*AgR}X6H55GAe(625 zG%n5jBCC)J-v2pQvA7jnGz*|k>s}w`IVXUxkmJtDN0uRln$;@pP*3PA-h(m;v8mXU z#!Y)@_T-G1GXQf)D`kofI*3cRpyK(C3t2=QsG8hDL*9z@hW7#ec&nPmXKzx|z3Urz zIqL@+|AaXF*UP~SM$p8*4DF#9pR##sy&vyV?KGly+Ucsbie%Tczy3jWuVQB-=-l8w zRWH^uUC~4Q$dh=U4R_aUg{PWLd;8U#%^7X_DLwP1?9-In>sH6PRl?hL(#6n`xf(2q z?P_XzXGN4L>U0rhPTQ|5(RclLaL&;i-;ViOch%!y-1HA=!JqX7NJ!P7gQ1ieP@2D3M{(270%}SV3N}u%sZ=>@Ca}>}%fdxeiuV~TCucWje=dHi@Yrl0 z!OjDV!|@&kU6^{!2wEZ9tO5uxejM1v8+HBNqm@eJIL&W2znA<0*=q@hUr92;j1ubf z3FGlKa$hKLvQ=|brM$UM8D%0e>w`?!r$br;6wGgH%!PSz&%Wzhb zRU-bjbH)Cfz}STNPe;!P^P2tt?u}>toRO7rZFp)kCB)6i(X}9ELOI}cw_=k3YNNx0 zMhGLwwGGcQjnn6l%J^$|FZ}HzQ!L1*>9u!ILomNkZ)j6y@n4b_IoRHJP{y>(5TLJ2 zrbd%POw99>H!(;V zH&yY7iEX>{@LRjL)zdPBstpD7nN85qGB%==vLabVV#ccPJ>mKNRGkw7lqsc{vsp}Z z+{hsT>Ns{Tm7`NvsV(siti>lAz(T&PjFjL`HGe4HYA+ViNy++*cC(!&`yn4w2pUM3 zUR%hOA4VOBj8yOf`M%z%W+633MxKPU5h7p(5^xP=tRlXVKtL8K!kME}d~5#6Bxm7@ zBBXA6PI^2Rj^dtc5h*y?k?ja%S8eIj#+HSBIYfYLg|Ov^hR;cy8_-*qgaX9Yc5#BzMW+J$%U)uGnEO=#Rif+H+k^Bw0bd^7_l*cfu$1XZqA3g^KPOq;!WqS9@p zJ=uVde#StNu8Fjrnf##9|0*BVtuCNdGt^bv_CpQ|znzo7XMT#T_?j2|a58Iy{Kg_| z8b9&}f1MQM#SwR}H~yb#r-lYtIbQ;#{`+^7FIVv!c%HE#(HHx3*vkYx9S0=QWvDk;FDQ~F`&dK_f&&Itc)d55 z$C#<>o{+-xXu@Ikn(e9f9ZL-{#xB|5k7twRK|k9%5b{OkDsXw47?>0WBYcLRo@W$l z--&E?ILFI^5{8TVBZFM8NYag@WLFt-`V+Ch?Ho2D?hO}bIQl!TER7GP(k_XHz3)2l zHMa(`m$9N@8labY%7{{o&Q>&OXqFDfR-beNql|z%3A&|;E<7w5(PFrZ}gJhE)sn}E|r2XFLk#j=&MesoRv4QQ*@2Nv6zfR8W&s0 zJ(Ls#H)q!<;33S1qn>FBGBkC-dWYVI`7YW65x|E7z97&Ja^14m9B@R?}miVkW)D=AFkkVu=0Ud>$TE!h?{lULA76-C_$B(y`=NATZ; zRvv}ujHUUfIWcUp$>_0UN47ML;v~V^I~V7azhHRKWuEG!M?&>sE}~ z^xF7^Jy$)$6QZ z^-bfP`L2}=(GYFdrfuA?y@w>OHF0G%vYSRH>I?_=W}ND#l44x<#)RlJr9I25*eGdV z0DbDRpfwI3BYDOIh4^|*b!C|m?j9}!TE!r!V&1aBwoCa93~8e>K8q^CLvu|OU^f!VNA+vZ zNui?SXt5uYb1^57r`OF{)qU(!hNFs#Amv5pJbVPph@s;v?Y*Uat?dqMs(#1y%t&2X zg?$a3P@xteCc``S5Tk@8dGN3K!5^%{Khd^LjaJ!8Y7IWr*bY7U?aRG|@izR6n3h&_ z`;(CX$VGi~?RAUM?oDPh8R;Z|=L3mB1oxNf@fO5|$?25Jj)zr|7#z#~MJpfZZU@fQ zuSNE$b3>zc|)!h1|5! zfRbL6C@XtQOkdZ4a|TuEPU^S%_doP4M_t531?P`UKN%nL)A+O!vdcKBfsuDs%o_rF zF44`^k9=NY=emN^_02-zOr$}NNI>)RuO^od6Ctiqovf^8%*!Q`$9CQA+brQ%XEAOK zF3U39X%c4aAdir(uQ-!rL%yt}Uv)04i?=*S`*b2+7d|{#%ZzsE7VL|D@+;=dbd}xJ ziM&NTNn|RidOqkP2R&|(TAdTnCVdHT{;~DP(BTF7zDczEVVS0J_^>|R#tjxYgGJoC zs{gPYMuUaycF*Q}E+t#AxNHcw`htu++?0aDTdzT_d!<2b5NJ?FK^l~Cg$A|lqXuPz s(V!f@aUWlUA`EFzUS%4TzqcWK(D?Lc@1YSi@p{p+HqW9@Fr1T_$d!a|DnwR0}#*Gvzd|71X z{g6z)-S85T*RV^r-#=duIdzJkyQgkAju_6m?CR~ezpidNTg?3z?>t+V5+Fc;009C7 z2oNAZfB*pkw+qz1`!D+c`t+KWnseOrpYYhnoWK3cJb?fK0t5&UAV7cs0RjXF5J-XS zzayHM#HQV}xgruEK!5-N0t5&UAV7cs0RsO);N@6E6QfwqKVBEpx7F%>xjxnP9Q72v tW~JgB=Xra^(*N(D`o%y literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G4/blast_db/contigs.fasta.nto b/tests/test_data/outputs/extract/G4/blast_db/contigs.fasta.nto new file mode 100755 index 0000000000000000000000000000000000000000..20d5cb86e6dff1f3684dc229a358a2ea697cecfb GIT binary patch literal 8 KcmZQ%fB*mh5C8%I literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G4/filtered.hsps.txt b/tests/test_data/outputs/extract/G4/filtered.hsps.txt new file mode 100755 index 0000000..8d800ae --- /dev/null +++ b/tests/test_data/outputs/extract/G4/filtered.hsps.txt @@ -0,0 +1,21 @@ +qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore locus_name ext_start ext_end is_5prime_complete is_3prime_complete is_complete is_5prime_boundary is_3prime_boundary is_on_boundary reverse complement is_extended is_5p_extended is_3p_extended +0 0 102 22872 0 101 22860 22759 102 0 100.0 100 100 minus 1.84e-51 189 locus_1 22759 22860 True True True False False False True False False False False +9 0 762 22872 0 761 22747 21986 762 0 100.0 100 100 minus 0.0 1408 locus_10 21986 22747 True True True False False False True False False False False +10 0 858 22872 0 857 21974 21117 858 0 100.0 100 100 minus 0.0 1585 locus_11 21117 21974 True True True False False False True False False False False +11 0 972 22872 0 971 21105 20134 972 0 100.0 100 100 minus 0.0 1796 locus_12 20134 21105 True True True False False False True False False False False +12 0 1098 22872 0 1097 20122 19025 1098 0 100.0 100 100 minus 0.0 2028 locus_13 19025 20122 True True True False False False True False False False False +13 0 1281 22872 0 1280 19013 17733 1281 0 100.0 100 100 minus 0.0 2366 locus_14 17733 19013 True True True False False False True False False False False +14 0 1434 22872 0 1433 17721 16288 1434 0 100.0 100 100 minus 0.0 2649 locus_15 16288 17721 True True True False False False True False False False False +15 0 1464 22872 0 1463 16276 14813 1464 0 100.0 100 100 minus 0.0 2704 locus_16 14813 16276 True True True False False False True False False False False +16 0 1836 22872 0 1835 14801 12966 1836 0 100.0 100 100 minus 0.0 3391 locus_17 12966 14801 True True True False False False True False False False False +17 0 1914 22872 0 1913 12954 11041 1914 0 100.0 100 100 minus 0.0 3535 locus_18 11041 12954 True True True False False False True False False False False +18 0 2037 22872 0 2036 11029 8993 2037 0 100.0 100 100 minus 0.0 3762 locus_19 8993 11029 True True True False False False True False False False False +1 0 285 22872 0 284 8981 8697 285 0 100.0 100 100 minus 1.05e-152 527 locus_2 8697 8981 True True True False False False True False False False False +19 0 4935 22872 0 4934 8685 3751 4935 0 100.0 100 100 minus 0.0 9114 locus_20 3751 8685 True True True False False False True False False False False +2 0 327 22872 0 326 3739 3413 327 0 100.0 100 100 minus 5.47e-176 604 locus_3 3413 3739 True True True False False False True False False False False +3 0 417 22872 0 416 3401 2985 417 0 100.0 100 100 minus 0.0 771 locus_4 2985 3401 True True True False False False True False False False False +4 0 444 22872 0 443 2973 2530 444 0 100.0 100 100 minus 0.0 821 locus_5 2530 2973 True True True False False False True False False False False +5 0 543 22872 0 542 2518 1976 543 0 100.0 100 100 minus 0.0 1003 locus_6 1976 2518 True True True False False False True False False False False +6 0 606 22872 0 605 1964 1359 606 0 100.0 100 100 minus 0.0 1120 locus_7 1359 1964 True True True False False False True False False False False +7 0 642 22872 0 641 1347 706 642 0 100.0 100 100 minus 0.0 1186 locus_8 706 1347 True True True False False False True False False False False +8 0 684 22872 0 683 694 11 684 0 100.0 100 100 minus 0.0 1264 locus_9 11 694 True True True False False False True False False False False diff --git a/tests/test_data/outputs/extract/G4/processed.extracted.seqs.fasta b/tests/test_data/outputs/extract/G4/processed.extracted.seqs.fasta new file mode 100755 index 0000000..85881cc --- /dev/null +++ b/tests/test_data/outputs/extract/G4/processed.extracted.seqs.fasta @@ -0,0 +1,40 @@ +>locus_1:0:0:0 +atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa +>locus_10:9:0:1 +atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa +>locus_11:10:0:2 +gtgaacctggtgaaaaccccgggtctgcacgcggcgggcaaaggcattaacgttgccaatgtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccaacttcaactttcccggctttgacgtgactcccgcagactgggaacgctttgttaacgacttcctgagctggctgggtcagttcgatatggtctgcgtaagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatcgttgatactagccgtgtagagttagtcgccggtcttatagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcccgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgcgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggaaaataccgtcggcgcaggcggttccatggttggcggatggatttacggcctgctgatgcgcgagtccaccgaacatacgctgcgcctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga +>locus_12:11:0:3 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>locus_13:12:0:4 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>locus_14:13:0:5 +ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctattacgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccagcgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcggaaaggcgggattggttcgcgcggtccgggtgaaacccagctcgaagccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgagaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgcagctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaattccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtccgcaggtacgaatgccgattgtcgactggcgtcgcttctgtaaacaagaaccagcgttgatcgaatacgtgatctag +>locus_15:14:0:6 +gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga +>locus_16:15:0:7 +atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctcgtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatctgcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacggtgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcactcaaccggaagggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaaggctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgatgcgctgtcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtaagctgatcagccttgtgcgcaaccttaatcaactgctcaaaagcgagcacgaacgttataacaaataccgcacgagcctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatctgcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcatagcccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttctggccgacagcatcccacacagaaagaggaataa +>locus_17:16:0:8 +atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa +>locus_18:17:0:9 +atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag +>locus_19:18:0:10 +atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacatcgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccacagtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggtcgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattacttgaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcatacagggcggtacggaagtttgtcagcaaaaactggataacatgcgcaacgcaggcgtaaaagtgtacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggggtcagcgcagagctcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacagaagaccggcaagctcggcgagatcctgttctttatgcgggcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccagcaatgactggcagttcgacggcgacgcggaaactattgcacactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgagatttccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa +>locus_2:1:0:11 +atgaaactttttctgacaacggcagcgctaactgcgaccttgacttccggcatgggatttgccagcgataccgtgatcccctgggcgactaacagtggcggtacggaaagcacgcatatcgcggcaatgggcgaggatgtgaatgcgcagcatcagcacatcatccacacgcatgaaggcgtatgtgccgcaaattccggcaccattcaggctgatgaagccgcgctgaccagcaacaagccaccggtacaagtccaaccggaattgttgccgcatcagggctaa +>locus_20:19:0:12 +atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa +>locus_3:2:0:13 +atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa +>locus_4:3:0:14 +ctgaaactaatacctttttatttgctcgcgctattttctgcctcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaacccctgcagataacccgccttataccgtgacaatattgccgccagtagaagcctgcggggaaaaatgtgtgaaactgaacgtagagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaacgacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctacaaccgacagctgcaattacttagggctggcgtaataccgaaaaataaaaagtggaaatggataacgaagatggaaaattactggttcaggcgggtaaaagcgcactaa +>locus_5:4:0:15 +atgaaaaaatttgatagctgtcatcctgtctttttactgatcggctgtgcccaggtgcctctccgttcctccgtgagcaaaccggtacagcaacctagcgctcagaaagagcaactggccaacgcaaatggtattgatgagtgtcagtcacttccgtatgtgccttcagaccttgcgaagaataaatcattatcaaaccagatcgctgataataccgcatcaaaaaatagcgcaatcagctcacgcattttttgcgaaaaatataagcaaaccaaagaacaggcgcttaccttcttccaggaacttccacaatacatgcgttcgaaagaagttgaagagcaacacatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa +>locus_6:5:0:16 +atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa +>locus_7:6:0:17 +gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggactcacaacatgcctgtagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgccagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcgccatccagtcggcaacctggttttcggcaggcaggtcgcgtagggcggttaaaaaaatgatggattgcggcagcggtatttattcttcaatcaatacgttactgaaaaattcgcagattaaaaatatagtcattttaacccataatcattgtttgacgtatattgtaaaaaataagcgcggcgtgaagtttgatccggagtatcttaatgcgctcgttatgtatgcggaaaacggaaaactccttttagacggtgaatttgttcccggctag +>locus_8:7:0:18 +atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga +>locus_9:8:0:19 +atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/extract/G4/raw.extracted.seqs.fasta b/tests/test_data/outputs/extract/G4/raw.extracted.seqs.fasta new file mode 100755 index 0000000..85881cc --- /dev/null +++ b/tests/test_data/outputs/extract/G4/raw.extracted.seqs.fasta @@ -0,0 +1,40 @@ +>locus_1:0:0:0 +atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa +>locus_10:9:0:1 +atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa +>locus_11:10:0:2 +gtgaacctggtgaaaaccccgggtctgcacgcggcgggcaaaggcattaacgttgccaatgtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccaacttcaactttcccggctttgacgtgactcccgcagactgggaacgctttgttaacgacttcctgagctggctgggtcagttcgatatggtctgcgtaagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatcgttgatactagccgtgtagagttagtcgccggtcttatagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcccgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgcgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggaaaataccgtcggcgcaggcggttccatggttggcggatggatttacggcctgctgatgcgcgagtccaccgaacatacgctgcgcctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga +>locus_12:11:0:3 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>locus_13:12:0:4 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>locus_14:13:0:5 +ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctattacgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccagcgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcggaaaggcgggattggttcgcgcggtccgggtgaaacccagctcgaagccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgagaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgcagctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaattccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtccgcaggtacgaatgccgattgtcgactggcgtcgcttctgtaaacaagaaccagcgttgatcgaatacgtgatctag +>locus_15:14:0:6 +gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga +>locus_16:15:0:7 +atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctcgtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatctgcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacggtgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcactcaaccggaagggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaaggctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgatgcgctgtcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtaagctgatcagccttgtgcgcaaccttaatcaactgctcaaaagcgagcacgaacgttataacaaataccgcacgagcctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatctgcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcatagcccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttctggccgacagcatcccacacagaaagaggaataa +>locus_17:16:0:8 +atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa +>locus_18:17:0:9 +atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag +>locus_19:18:0:10 +atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacatcgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccacagtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggtcgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattacttgaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcatacagggcggtacggaagtttgtcagcaaaaactggataacatgcgcaacgcaggcgtaaaagtgtacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggggtcagcgcagagctcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacagaagaccggcaagctcggcgagatcctgttctttatgcgggcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccagcaatgactggcagttcgacggcgacgcggaaactattgcacactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgagatttccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa +>locus_2:1:0:11 +atgaaactttttctgacaacggcagcgctaactgcgaccttgacttccggcatgggatttgccagcgataccgtgatcccctgggcgactaacagtggcggtacggaaagcacgcatatcgcggcaatgggcgaggatgtgaatgcgcagcatcagcacatcatccacacgcatgaaggcgtatgtgccgcaaattccggcaccattcaggctgatgaagccgcgctgaccagcaacaagccaccggtacaagtccaaccggaattgttgccgcatcagggctaa +>locus_20:19:0:12 +atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa +>locus_3:2:0:13 +atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa +>locus_4:3:0:14 +ctgaaactaatacctttttatttgctcgcgctattttctgcctcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaacccctgcagataacccgccttataccgtgacaatattgccgccagtagaagcctgcggggaaaaatgtgtgaaactgaacgtagagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaacgacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctacaaccgacagctgcaattacttagggctggcgtaataccgaaaaataaaaagtggaaatggataacgaagatggaaaattactggttcaggcgggtaaaagcgcactaa +>locus_5:4:0:15 +atgaaaaaatttgatagctgtcatcctgtctttttactgatcggctgtgcccaggtgcctctccgttcctccgtgagcaaaccggtacagcaacctagcgctcagaaagagcaactggccaacgcaaatggtattgatgagtgtcagtcacttccgtatgtgccttcagaccttgcgaagaataaatcattatcaaaccagatcgctgataataccgcatcaaaaaatagcgcaatcagctcacgcattttttgcgaaaaatataagcaaaccaaagaacaggcgcttaccttcttccaggaacttccacaatacatgcgttcgaaagaagttgaagagcaacacatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa +>locus_6:5:0:16 +atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa +>locus_7:6:0:17 +gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggactcacaacatgcctgtagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgccagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcgccatccagtcggcaacctggttttcggcaggcaggtcgcgtagggcggttaaaaaaatgatggattgcggcagcggtatttattcttcaatcaatacgttactgaaaaattcgcagattaaaaatatagtcattttaacccataatcattgtttgacgtatattgtaaaaaataagcgcggcgtgaagtttgatccggagtatcttaatgcgctcgttatgtatgcggaaaacggaaaactccttttagacggtgaatttgttcccggctag +>locus_8:7:0:18 +atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga +>locus_9:8:0:19 +atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/extract/G4/seq_data.txt b/tests/test_data/outputs/extract/G4/seq_data.txt new file mode 100755 index 0000000..755d0d3 --- /dev/null +++ b/tests/test_data/outputs/extract/G4/seq_data.txt @@ -0,0 +1,21 @@ +id seqid locus_name query_id qlen start end sub_start sub_ent ident qcovs bitscore reverse complement is_complete is_trunc fivep_trunc threep_trunc is_extended is_5p_extended is_3p_extended seq start_codon stop_codon is_stop_valid is_start_valid is_cds_valid +0 0 locus_1 0 102 22759 22861 22860 22759 100.0 100 189 True False True False False False False False False atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa atg taa True True True +1 0 locus_10 9 762 21986 22748 22747 21986 100.0 100 1408 True False True False False False False False False atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa atg taa True True True +2 0 locus_11 10 858 21117 21975 21974 21117 100.0 100 1585 True False True False False False False False False gtgaacctggtgaaaaccccgggtctgcacgcggcgggcaaaggcattaacgttgccaatgtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccaacttcaactttcccggctttgacgtgactcccgcagactgggaacgctttgttaacgacttcctgagctggctgggtcagttcgatatggtctgcgtaagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatcgttgatactagccgtgtagagttagtcgccggtcttatagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcccgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgcgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggaaaataccgtcggcgcaggcggttccatggttggcggatggatttacggcctgctgatgcgcgagtccaccgaacatacgctgcgcctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga gtg tga True True True +3 0 locus_12 11 972 20134 21106 21105 20134 100.0 100 1796 True False True False False False False False False atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga atg tga True True True +4 0 locus_13 12 1098 19025 20123 20122 19025 100.0 100 2028 True False True False False False False False False atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga atg tga True True True +5 0 locus_14 13 1281 17733 19014 19013 17733 100.0 100 2366 True False True False False False False False False ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctattacgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccagcgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcggaaaggcgggattggttcgcgcggtccgggtgaaacccagctcgaagccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgagaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgcagctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaattccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtccgcaggtacgaatgccgattgtcgactggcgtcgcttctgtaaacaagaaccagcgttgatcgaatacgtgatctag ttg tag True True True +6 0 locus_15 14 1434 16288 17722 17721 16288 100.0 100 2649 True False True False False False False False False gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga gtg tga True True True +7 0 locus_16 15 1464 14813 16277 16276 14813 100.0 100 2704 True False True False False False False False False atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctcgtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatctgcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacggtgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcactcaaccggaagggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaaggctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgatgcgctgtcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtaagctgatcagccttgtgcgcaaccttaatcaactgctcaaaagcgagcacgaacgttataacaaataccgcacgagcctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatctgcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcatagcccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttctggccgacagcatcccacacagaaagaggaataa atg taa True True True +8 0 locus_17 16 1836 12966 14802 14801 12966 100.0 100 3391 True False True False False False False False False atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa atg taa True True True +9 0 locus_18 17 1914 11041 12955 12954 11041 100.0 100 3535 True False True False False False False False False atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag atg tag True True True +10 0 locus_19 18 2037 8993 11030 11029 8993 100.0 100 3762 True False True False False False False False False atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacatcgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccacagtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggtcgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattacttgaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcatacagggcggtacggaagtttgtcagcaaaaactggataacatgcgcaacgcaggcgtaaaagtgtacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggggtcagcgcagagctcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacagaagaccggcaagctcggcgagatcctgttctttatgcgggcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccagcaatgactggcagttcgacggcgacgcggaaactattgcacactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgagatttccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa atg taa True True True +11 0 locus_2 1 285 8697 8982 8981 8697 100.0 100 527 True False True False False False False False False atgaaactttttctgacaacggcagcgctaactgcgaccttgacttccggcatgggatttgccagcgataccgtgatcccctgggcgactaacagtggcggtacggaaagcacgcatatcgcggcaatgggcgaggatgtgaatgcgcagcatcagcacatcatccacacgcatgaaggcgtatgtgccgcaaattccggcaccattcaggctgatgaagccgcgctgaccagcaacaagccaccggtacaagtccaaccggaattgttgccgcatcagggctaa atg taa True True True +12 0 locus_20 19 4935 3751 8686 8685 3751 100.0 100 9114 True False True False False False False False False atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa atg taa True True True +13 0 locus_3 2 327 3413 3740 3739 3413 100.0 100 604 True False True False False False False False False atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa atg taa True True True +14 0 locus_4 3 417 2985 3402 3401 2985 100.0 100 771 True False True False False False False False False ctgaaactaatacctttttatttgctcgcgctattttctgcctcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaacccctgcagataacccgccttataccgtgacaatattgccgccagtagaagcctgcggggaaaaatgtgtgaaactgaacgtagagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaacgacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctacaaccgacagctgcaattacttagggctggcgtaataccgaaaaataaaaagtggaaatggataacgaagatggaaaattactggttcaggcgggtaaaagcgcactaa ctg taa True True True +15 0 locus_5 4 444 2530 2974 2973 2530 100.0 100 821 True False True False False False False False False atgaaaaaatttgatagctgtcatcctgtctttttactgatcggctgtgcccaggtgcctctccgttcctccgtgagcaaaccggtacagcaacctagcgctcagaaagagcaactggccaacgcaaatggtattgatgagtgtcagtcacttccgtatgtgccttcagaccttgcgaagaataaatcattatcaaaccagatcgctgataataccgcatcaaaaaatagcgcaatcagctcacgcattttttgcgaaaaatataagcaaaccaaagaacaggcgcttaccttcttccaggaacttccacaatacatgcgttcgaaagaagttgaagagcaacacatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa atg taa True True True +16 0 locus_6 5 543 1976 2519 2518 1976 100.0 100 1003 True False True False False False False False False atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa atg taa True True True +17 0 locus_7 6 606 1359 1965 1964 1359 100.0 100 1120 True False True False False False False False False gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggactcacaacatgcctgtagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgccagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcgccatccagtcggcaacctggttttcggcaggcaggtcgcgtagggcggttaaaaaaatgatggattgcggcagcggtatttattcttcaatcaatacgttactgaaaaattcgcagattaaaaatatagtcattttaacccataatcattgtttgacgtatattgtaaaaaataagcgcggcgtgaagtttgatccggagtatcttaatgcgctcgttatgtatgcggaaaacggaaaactccttttagacggtgaatttgttcccggctag gtg tag True True True +18 0 locus_8 7 642 706 1348 1347 706 100.0 100 1186 True False True False False False False False False atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga atg tga True True True +19 0 locus_9 8 684 11 695 694 11 100.0 100 1264 True False True False False False False False False atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag atg tag True True True diff --git a/tests/test_data/outputs/extract/G5/blast/hsps.txt b/tests/test_data/outputs/extract/G5/blast/hsps.txt new file mode 100755 index 0000000..18eb24f --- /dev/null +++ b/tests/test_data/outputs/extract/G5/blast/hsps.txt @@ -0,0 +1,20 @@ +0 0 102 22872 1 102 12 113 103 1 97.087 100 100 plus 1.85e-46 172 +1 0 285 22872 1 285 13891 14175 285 17 94.035 100 100 plus 2.36e-124 433 +2 0 327 22872 1 327 19133 19459 327 0 100.000 100 100 plus 5.47e-176 604 +3 0 417 22872 1 417 19471 19887 417 11 97.362 100 100 plus 0.0 710 +4 0 444 22872 1 444 19899 20342 444 15 96.622 100 100 plus 0.0 737 +5 0 543 22872 1 543 20354 20896 543 0 100.000 100 100 plus 0.0 1003 +6 0 606 22872 1 606 20908 21513 606 15 97.525 100 100 plus 0.0 1037 +7 0 642 22872 1 642 21525 22166 642 0 100.000 100 100 plus 0.0 1186 +8 0 684 22872 1 684 22178 22861 684 0 100.000 100 100 plus 0.0 1264 +9 0 762 22872 1 762 125 886 762 0 100.000 100 100 plus 0.0 1408 +10 0 858 22872 1 858 898 1755 858 19 97.786 100 100 plus 0.0 1480 +11 0 972 22872 1 972 1767 2738 972 0 100.000 100 100 plus 0.0 1796 +12 0 1098 22872 1 1098 2750 3847 1098 0 100.000 100 100 plus 0.0 2028 +13 0 1281 22872 1 1281 3859 5139 1281 11 99.141 100 100 plus 0.0 2305 +14 0 1434 22872 1 1434 5151 6584 1434 0 100.000 100 100 plus 0.0 2649 +15 0 1464 22872 1 1464 6596 8059 1464 15 98.975 100 100 plus 0.0 2621 +16 0 1836 22872 1 1836 8071 9906 1836 0 100.000 100 100 plus 0.0 3391 +17 0 1914 22872 1 1914 9918 11831 1914 0 100.000 100 100 plus 0.0 3535 +18 0 2037 22872 1 2037 11843 13879 2037 16 99.215 100 100 plus 0.0 3674 +19 0 4935 22872 1 4935 14187 19121 4935 0 100.000 100 100 plus 0.0 9114 diff --git a/tests/test_data/outputs/extract/G5/blast_db/contigs.fasta b/tests/test_data/outputs/extract/G5/blast_db/contigs.fasta new file mode 100755 index 0000000..d8c0dc1 --- /dev/null +++ b/tests/test_data/outputs/extract/G5/blast_db/contigs.fasta @@ -0,0 +1,2 @@ +>0 +nnnnnnnnnnnatgtactgaacaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaannnnnnnnnnnatgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaannnnnnnnnnngtgaacctggtgaaaaccacgggtctgcacgcggcgggcaaaggcattaacgttgccaaagtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccgacttcaacttttccggctttgacgtgactcccgcagactgggaacgctttgttaacgactccctgagctggctgggtcagttcgatatggtctgcgtcagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatctttgatagtagccgtgaagcgttagtcgccggtcttaaagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcacgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgtgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggtaagtaccgtcggcgcaggcgattccatggttggcggattgatttacggcctgctgatgcgcgagtccagcgaacatacgctgcgtctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactgannnnnnnnnnnatgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatgannnnnnnnnnnatgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctgannnnnnnnnnnttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctcttccgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccaacgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcagaaaggcgggattggtttgcgcggtccgggtgaaacccagctcgaaaccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgaaaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgctgctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaaatccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtctgcaggtacgaatgccgattgtcgactggcgtcgcctctgtaaacaagaaccagcgttgatcgaatacgtgatctagnnnnnnnnnnngtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatgannnnnnnnnnnatgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctggtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatcagcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacgctgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcattcaaccggaatggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaacgctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgaggcgctggcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtgagctgaccagccttgtgcgcaaccttaatcaactgctcaaaagcgagcgcgaacgttataacaaataccgcacgaccctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatcagcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcataccccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttttggccgacagcatcccacacagaaagaggaataannnnnnnnnnnatgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataannnnnnnnnnnatgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtagnnnnnnnnnnnatgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacattgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccaccgtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggccgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattactggaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcattcagggcggtacggaagtttgtcagcaaaaactggataccatgcgcaacgcaggcgtaaaagtgaacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggcgtcagcgcagagatcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacaggagaccggcaagctcggcgagatcctgttctttatgcgtgcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccggcaataactggcagttcgacggcgacgcggaaactattgcccactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgaaattaccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaannnnnnnnnnnatgaaactttttctgacaacggcagcgctaactgcgaccttgatttccggcatggcatttgccagcgatcccgtcatcccctgggcgactaacagcggcggtacggaaagcacgcatattgcggcaatgggcgaggatttgaatgcgcagcatcagcagatcacccacacgcatgaaggcgtatgggcggccaattccggcagcattcaggctgatgaagccgcgctgaccagcaacaagccgccggtacaaggccacccggaattgatgccgcatcagggctaannnnnnnnnnnatgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaannnnnnnnnnnatggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaannnnnnnnnnnctgaaactaatacctttttatttgctcgcgctattttctgccgcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaaccactgcagataacccgccttataccatgacaatattgccgccagtagaaggctgcggggaaaaatgtgtgaaactgaacgtacagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaaggacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctataaccgacagctgcaattactttgggctggcgtaataccgaaaaataaaaaggggaaatggataacgaaaatggaaaattactggttcaggcaggtaaaagcgcactaannnnnnnnnnnatgaaaaaattttatagctgtcttcctgtctttttactgatcggctgtgcccaggtgcccctcccttcctccgtgagcaaaccggtacagcaacctggcgctcagaaagagcaactggccaacgcaaatagtattgatgagtgtcagtctcttccgtatgtgccgtcagaccttgcgaagaataaatcattatcaaaccagaacgctgataattccgcatcaaaaaatagcgcaatcagctcaagcattttttgcgaaaaatataaacaaaccaaagaacaggcgcttaccttcttccaggaacatccacaatacatgcgttcgaaagaagatgaagagcaactcatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataannnnnnnnnnnatgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaannnnnnnnnnngtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggcctcacaacatgcctggagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgtcagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcaccatccagtcggcaacctggttttccgcaggcaggtcgcttacggcggataaaaaaatgatggattgcggcagcggtatttatgcttcaatcaatacgttactgaaaaagtcgcagaataaaaatatagtcatttttacccataatcattgtttgacgtatattgcaaaaaataagcgcggcgtgaagtttgatccggactatcttaatgcgctcgttatgtatgcggaaaacggaaaactctttttagacggtgaatttgttcccggctagnnnnnnnnnnnatgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatgannnnnnnnnnnatgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctagnnnnnnnnnnn diff --git a/tests/test_data/outputs/extract/G5/blast_db/contigs.fasta.ndb b/tests/test_data/outputs/extract/G5/blast_db/contigs.fasta.ndb new file mode 100755 index 0000000000000000000000000000000000000000..5fd6f7085890c929e9fa647574a304e6e7c1d317 GIT binary patch literal 20480 zcmeI&O-jR15CGt@3T7h_@eblfJc3sVVx^$TqQ!Nu)s6HdF1xii(B|hgQK1OcMd8aL zZ(j2%Z#3(Lfo-gxxPK5vg0t5&UAV7cs0RjXF5cpex+fBMroW@J_R2I(m+4ypqiHV!bpVKM|-$ z5Jf;QFC_^mDCSj~r(kHHqhMrUWTK#8XlP|%so>}flwpjBgjp0B0cAOW#DSVN05MQZ GYy$w#lpe_d literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G5/blast_db/contigs.fasta.njs b/tests/test_data/outputs/extract/G5/blast_db/contigs.fasta.njs new file mode 100755 index 0000000..45b0214 --- /dev/null +++ b/tests/test_data/outputs/extract/G5/blast_db/contigs.fasta.njs @@ -0,0 +1,22 @@ +{ + "version": "1.2", + "dbname": "contigs.fasta", + "dbtype": "Nucleotide", + "db-version": 5, + "description": "locidex/extract/G5/blast_db/contigs.fasta", + "number-of-letters": 22872, + "number-of-sequences": 1, + "last-updated": "2024-06-10T11:09:00", + "number-of-volumes": 1, + "bytes-total": 42908, + "bytes-to-cache": 5952, + "files": [ + "contigs.fasta.ndb", + "contigs.fasta.nhr", + "contigs.fasta.nin", + "contigs.fasta.not", + "contigs.fasta.nsq", + "contigs.fasta.ntf", + "contigs.fasta.nto" + ] +} diff --git a/tests/test_data/outputs/extract/G5/blast_db/contigs.fasta.not b/tests/test_data/outputs/extract/G5/blast_db/contigs.fasta.not new file mode 100755 index 0000000000000000000000000000000000000000..d6562660b009ba390419e760e6e10a80c529e3d8 GIT binary patch literal 20 OcmZQ%fB;4)4Wa-5Gynqt literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G5/blast_db/contigs.fasta.nsq b/tests/test_data/outputs/extract/G5/blast_db/contigs.fasta.nsq new file mode 100755 index 0000000000000000000000000000000000000000..12a4f886771706f4fe51a8d4b8459f96a64231aa GIT binary patch literal 5808 zcmWModpMJe1OD8`M0Mmg&QA)Ht{m4Pm*b>12SrCVCv9ijx6QEQsU!O&ccqxMY4&Ye ziax2Ka)}+e)S=TkNvUL%=Dtx%(s7Gl@AJIxAMf+L|Gw`FC^_Yk5m)yD*G@S<1C$@hSU5%OOtg3s1QoSd6nt!6zY72=?$y~W8GJ@IO^iq)J zj*+|gJuKG{<3@EX$E%o~MM^~B<<#{i0mBh0{PkcHFwCd6*QO;$dcE}me_Gp?lc+bH zVH!6S%&O8BcC;SconmmRqCdfAfcPsb(JV{T)KDgUMo|w-xq^uIVd0(&|2i~b zKPlO#&=WD0HCkjPs9)w^bu->EDnn)0v@iiA$wfaf^Zn$*U1;XfSMTuE!4y$Ow)I zQl4;68uA6s0xwI|er}tkMZg*(a17z>?*~))P7bdIjB{riGy z;pOK~{I0te)uzwcm=EBmfeF9ieeNkPTSEi9X@K{R`fT1G+rzZ;miR_9VnRGlvmWB6 zJ`XTA_h93V?&p>Hn>nPtoYulKK?N-G#Z8}C{Eg5b;eHQPf-8R=d}?of9F-cOUu)fW zj*7YPBw^3A2M#)*b~4|XB)5+E9GCQ%o^a1e`7;=Sa3VU7AKy7%tR=zBCJ-;$B?({| z*7>%(2HLq-=IxyBtA0-qupcjct?~Yss7hOD8J7bZ%_-&1rS|4ID%i9eFtGEX3QD`k zQ)jb&5@WL_RI_S*K|q0e%OBg?vQJjOC9)gQ>j|Tsa#8yQTGYDD?`jP13l=s=(23d< zjes4$yzpE&^5Wt)v7TDGEN+gQqtZxkJ^$`mJ!}o%j?B8%N?4G=j+jtQSHiTk^2(vp zs9tSJQSAhq)TD>+*UN2O;oegEjTMD}v1iwfpiv_PM4%M@D6;Oe>Xs==J5eOy#gR2* zyZu>U^%IsIzGniX{>8d$(9Z|U8&H}5hZL}#R{A1mTx-oCF?1nE{8XrEkzDX6};VR49tRmJ?wt8(f(hCJwFZa^W$du6`$2CHF zUjE&AWCwzGk#QzP1Vd>`7HQ4xG9k!TdpGZ#0UO&pMCe){y-d+G9(|{48^I~ewl4gq zLA8to3Wpj7=-ATkYWvu`i>15lw}tMINLUh3Z4CjN=*qzQ*5`)(L;J6lZRiBKUnqVq zoQIzBU}063gtDTH%&$k_r->j&M&BZ8(cSF-D&+YXLZXcXML*-hMEq=J4g=03#FK{0 zX5l!uL6+bWy=0*pCk|`d7<=$xFcE~B@4=?@(2HuF+1E+Xz$wW72y7{g|F29$5)pdV zb=wP8QET2YcsWW+9F1yD?=zDX4Ggt?v<|n35gkmTX>*JTMTWutGLaWu+OECu@*rkz z->Mh=o+voQz&1m_Mn8fwRZ4+1X%}VEC?i(Syfz^&-JF(7iuiSa;Y}FSVNGeYw;x{{ z2=bp1U|`VOd`MBz4w0Sc2||m>A7jN`cYc7hN5dQSY_%MNPkzvNp+(hGl2ez1xt8tM zN{JZ@iD0ZE^d}}RF4o_sG-WgjwrKP3yYBs?)GunYK-bs=@Y?FYRJE@rM}8fbDJN<< zYcesO)i_y`ezM|;a5p^7dMsVH>a1t|Wr!;w+gr3fK0?>Iu4ANwZ*?rm@VtW4jxrsV z`CF>2_`Y*j1hh@RXhz%ZKbvRQ0+v>3VLB>P>n+6*!Kov^a1m=gDa{3h97&V@E}6!%a~vL5eO`6 z5O2)fOlAben5lz!YHL^`R|UDH{c29J>A&$8>;S|>Yir@ z3im9t@&U?f|4wgyJhYMV&S)9h^ZxADO8OQ`p})|7kj%3>T+w;R9yJer7zOKLk;|_X z!bW6DCR4&^XUl>xaR;7{V@-yV74&#@b=y)j-ur!)Jfq6)l4+)*V;R(O%9@)7AIMR!PMkHW}zz8z~oQ zk+BLcf-w(HkFCeWRa|2A26PJ<}KvJ&6b(JoRR3GFxNZD-k#_PMbxe4_{RdfYOQHw3D4YS_Zn# z{2V3(GZxLAqTxw+d=VxTSEa`gLe?@x|A;WCsA!z$6TIq&86wi+;-0Ua7Ap()RP-$% zNNOwMx>({9D5&j{py(9Gb(Ck**vLLf6%&I7-*!?Q>$Q2)e|I%TPq6#urRa2{d47Kue!j=Wq5*oEnL%d z!)CpCc)k;_+1?WPED5EZWXM%j4RWtV81)0VROycr_sAD76-op*zqRHvk7O0>wu^(0 zh~Y9f6$C=!VfoL-CO1?33o&5InwbOY3u!Pf&avh(@dKtx!m9O1#ZfcMV+C+g6Nt)0 z@HZdrn_E!TH+GlnVs@@4>=^U96pzMmq zb|&Wrguc;_E?dylK(z6S;TY^_!aMIdZFAM*FEV&kbJniv=N)s$BK7K^ z*%Df*`<7hSq)G`nSJj{z1-E_ z-~&5n$E~SGvg2OXzVbP+uL<>u7ofHqoaHT37Zc=ld6U^&{p}6SaXY^5Wjf zQ{8$$Cloie=MFK>vCamU4v7uuRuW40^lz);y-y^Gjh<(WQyjZ&?rHmk0=8iu4Az(g z@@9TTjE174gS*E9kzU%_=R#EM#S1gYx?4$E43o2@ACtv}sc)Yo#?6`wdUf{h^O~tZ zOfy?;&Y$+0hK{~(=8i2h5HC_>_1H74E35LmeW?p!XhFdFNg1Pt`Wa5gP&jxQ5dCv81z>Mc1qUm@(3 zc{#wtcwGn4ZNo#P4LqV-)qQv^C?mWpQ4t&FHiJ@U2#~DM#d>KahXF zCBd^S7Mlo72e-P@ThKBi%)T%DyH5a=?;$Dg#YOBuda>xba?<>|4mRtIs9tj!_4H7y zPv%-52Tk&T658sE73aYGYI9+87Ty}Oq(@~B`Sv5ywh`NII@nvT(q&9DODUDy5WIBBX;?TS2zhFFS0 zLe5N}y0QR`z&$qbt*upCg1P-~F!Pm=XXE*g?`81CN|PT$-ah$q5mWd&nj}WGyb3IT z2p5Ri0OMkz9tW-7eM@i4m36%Mm;?uhGSnFtj%tpm?ncpbD`$iaaG<&P!Q#aEDgQ$X zgLE8I;>`$5Vex@|GMHO_PPMmhnboto4*6vg#qZ=A^3z{Qi4zo=5?O@OLI#5TqPH4< zg&ox7d-{1x&Q4bps;1^~hu!=v+LxFy`QE>-IZR$1k_?ib4T66Q#H%GDmljg?;GsSA zVzj*L`xq;ieJyV0e*NSeK3&+~(}oFzxBmv9Yx>$unkz#=tn!R6q+eZzHH1QFZH0gx zq^`=}A}dsZJT0bnZyHJMjl{=5I|5+xh*dBCUK=DTeWcmefwg*su%sl91hc0$1?cSV z2B)0Jj73*E%%;l?=ZdGVz7S6nXA}e~rI{tDQeU#1uPM-ijEku!Jwq_lH3=lDPV&5% z4uCN?X8(FkoGQ;TCnkF$kakq54jkSsCNNfv1Vk8?f;QcAcu*+!w?! zqcl$UzSIK@tBHcwRDY|8`i1QTL?+=#S0z-$@)$JJW?n@1gBWcxAb z#flb4|MD(fQ6M`IqE|Iv*f%%WcN>;vwP~`%#so?@_HH&e&em)I#5)T#I;`e~YZ~rB ztM*3~i!wNFt7FBx2zwzEkf6i|&u-ty{eUjh6P@rx7@L&&D*NvExN-GH(}=jamq@Bh zkm!V82!riQqGG)O77Qv}S)NfpC)o)SomsVurBhxyH<(+GRH8~SyrNg(VBBwnf=3PX ztHmEl6<%WQ#BuJnmvX9XZ_A?smx$uqYoZgCT}0V!jTCQ-B@TdGvGEi(W|vcC?-^0b zm|ummMVES$7lAR3HbAww$AhQsXFwjNEw7a+#Y#`{czttjuZI-CEQg4VC62;xTFz7MDmQS-x89 zfM;cnfbKAU6?=!=3=Exe-A5^`osq?G>bzM?Pe;qF&Ra)6;mGHNu4Ct!0_6wY&j=6+w>=8x$?KE*6m{HlF{;JDdNc^99 zAiS~|X1MX(O`vMKnfIvusv+`3@m=YX-eYqU?uN}%P`nFS2iTsW?#S>4W~X}vx6l{v zxAC86=&U??KmUI1VPR!+MDvj&uVV7G*d~DxFsJwX!g}&Yi#%tDA)n(JTqvyLC417>*IoX43H~p+9XBh=<19v#)@IbB zBF6ExW2%hxq@sRbQv*K8l+~#wAWad&Gk|C(0%XEfjS$gMn8gLx;Aq&P7OV@Bx#Pj& zG$%jKc0IB6{bU86kl6Idi5w|7EuQezd4e*LY0+x;*N1#AQT@pN7~f?zS+Q~iv+_|( ztMse*Kja3d)>k>TbaZ1_Y`)y;HPNm^?j(~i5VU>Yoxzs*;_>_%Dku7ZMM-_mg&V%? zz#5uealTBBPTL**IKVgPmRk;CG{A_$j&6NiI`YAkox5bUwS^CQ3Vv^Ht$#I!c{*laBR za1n?1Th^3C*|hbujJ!q$gM#kZw&(D%#>>6WYdu<{UlN|_pm((SK8rfKraX;{3$P8@ zd$WfD(T{eH6<)vVV{^&q0My_TV0dQ<*r2}zm~L7EFilIq=F}ws`_0?!xdeE8 lqvx?DfN*;W_*rKO2(4WL!oTGXl`R2>(Mv#-XbB*O{vSqIJKF#N literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G5/blast_db/contigs.fasta.ntf b/tests/test_data/outputs/extract/G5/blast_db/contigs.fasta.ntf new file mode 100755 index 0000000000000000000000000000000000000000..005ac416d78d808991db961db8e272a0664078b1 GIT binary patch literal 16384 zcmeI&O=`kW5CGt5>cWK(D?Lc@1YSi@p{p+HqW9@Fr1T_$d!a|DnwR0}#*Gvzd|71X z{g6z)-S85T*RV^r-#=duIdzJkyQgkAju_6m?CR~ezpidNTg?3z?>t+V5+Fc;009C7 z2oNAZfB*pkw+qz1`!D+c`t+KWnseOrpYYhnoWK3cJb?fK0t5&UAV7cs0RjXF5J-XS zzayHM#HQV}xgruEK!5-N0t5&UAV7cs0RsO);N@6E6QfwqKVBEpx7F%>xjxnP9Q72v tW~JgB=Xra^(*N(D`o%y literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G5/blast_db/contigs.fasta.nto b/tests/test_data/outputs/extract/G5/blast_db/contigs.fasta.nto new file mode 100755 index 0000000000000000000000000000000000000000..20d5cb86e6dff1f3684dc229a358a2ea697cecfb GIT binary patch literal 8 KcmZQ%fB*mh5C8%I literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G5/filtered.hsps.txt b/tests/test_data/outputs/extract/G5/filtered.hsps.txt new file mode 100755 index 0000000..dfc5707 --- /dev/null +++ b/tests/test_data/outputs/extract/G5/filtered.hsps.txt @@ -0,0 +1,21 @@ +qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore locus_name ext_start ext_end is_5prime_complete is_3prime_complete is_complete is_5prime_boundary is_3prime_boundary is_on_boundary reverse complement is_extended is_5p_extended is_3p_extended +0 0 102 22872 0 101 11 112 103 1 97.087 100 100 plus 1.85e-46 172 locus_1 11 112 True True True False False False False True False False False +9 0 762 22872 0 761 124 885 762 0 100.0 100 100 plus 0.0 1408 locus_10 124 885 True True True False False False False True False False False +10 0 858 22872 0 857 897 1754 858 19 97.786 100 100 plus 0.0 1480 locus_11 897 1754 True True True False False False False True False False False +11 0 972 22872 0 971 1766 2737 972 0 100.0 100 100 plus 0.0 1796 locus_12 1766 2737 True True True False False False False True False False False +12 0 1098 22872 0 1097 2749 3846 1098 0 100.0 100 100 plus 0.0 2028 locus_13 2749 3846 True True True False False False False True False False False +13 0 1281 22872 0 1280 3858 5138 1281 11 99.141 100 100 plus 0.0 2305 locus_14 3858 5138 True True True False False False False True False False False +14 0 1434 22872 0 1433 5150 6583 1434 0 100.0 100 100 plus 0.0 2649 locus_15 5150 6583 True True True False False False False True False False False +15 0 1464 22872 0 1463 6595 8058 1464 15 98.975 100 100 plus 0.0 2621 locus_16 6595 8058 True True True False False False False True False False False +16 0 1836 22872 0 1835 8070 9905 1836 0 100.0 100 100 plus 0.0 3391 locus_17 8070 9905 True True True False False False False True False False False +17 0 1914 22872 0 1913 9917 11830 1914 0 100.0 100 100 plus 0.0 3535 locus_18 9917 11830 True True True False False False False True False False False +18 0 2037 22872 0 2036 11842 13878 2037 16 99.215 100 100 plus 0.0 3674 locus_19 11842 13878 True True True False False False False True False False False +1 0 285 22872 0 284 13890 14174 285 17 94.035 100 100 plus 2.3600000000000003e-124 433 locus_2 13890 14174 True True True False False False False True False False False +19 0 4935 22872 0 4934 14186 19120 4935 0 100.0 100 100 plus 0.0 9114 locus_20 14186 19120 True True True False False False False True False False False +2 0 327 22872 0 326 19132 19458 327 0 100.0 100 100 plus 5.47e-176 604 locus_3 19132 19458 True True True False False False False True False False False +3 0 417 22872 0 416 19470 19886 417 11 97.362 100 100 plus 0.0 710 locus_4 19470 19886 True True True False False False False True False False False +4 0 444 22872 0 443 19898 20341 444 15 96.622 100 100 plus 0.0 737 locus_5 19898 20341 True True True False False False False True False False False +5 0 543 22872 0 542 20353 20895 543 0 100.0 100 100 plus 0.0 1003 locus_6 20353 20895 True True True False False False False True False False False +6 0 606 22872 0 605 20907 21512 606 15 97.525 100 100 plus 0.0 1037 locus_7 20907 21512 True True True False False False False True False False False +7 0 642 22872 0 641 21524 22165 642 0 100.0 100 100 plus 0.0 1186 locus_8 21524 22165 True True True False False False False True False False False +8 0 684 22872 0 683 22177 22860 684 0 100.0 100 100 plus 0.0 1264 locus_9 22177 22860 True True True False False False False True False False False diff --git a/tests/test_data/outputs/extract/G5/processed.extracted.seqs.fasta b/tests/test_data/outputs/extract/G5/processed.extracted.seqs.fasta new file mode 100755 index 0000000..dcf41b0 --- /dev/null +++ b/tests/test_data/outputs/extract/G5/processed.extracted.seqs.fasta @@ -0,0 +1,40 @@ +>locus_1:0:0:0 +atgtactgaacaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa +>locus_10:9:0:1 +atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa +>locus_11:10:0:2 +gtgaacctggtgaaaaccacgggtctgcacgcggcgggcaaaggcattaacgttgccaaagtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccgacttcaacttttccggctttgacgtgactcccgcagactgggaacgctttgttaacgactccctgagctggctgggtcagttcgatatggtctgcgtcagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatctttgatagtagccgtgaagcgttagtcgccggtcttaaagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcacgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgtgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggtaagtaccgtcggcgcaggcgattccatggttggcggattgatttacggcctgctgatgcgcgagtccagcgaacatacgctgcgtctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga +>locus_12:11:0:3 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>locus_13:12:0:4 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>locus_14:13:0:5 +ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctcttccgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccaacgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcagaaaggcgggattggtttgcgcggtccgggtgaaacccagctcgaaaccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgaaaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgctgctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaaatccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtctgcaggtacgaatgccgattgtcgactggcgtcgcctctgtaaacaagaaccagcgttgatcgaatacgtgatctag +>locus_15:14:0:6 +gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga +>locus_16:15:0:7 +atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctggtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatcagcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacgctgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcattcaaccggaatggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaacgctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgaggcgctggcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtgagctgaccagccttgtgcgcaaccttaatcaactgctcaaaagcgagcgcgaacgttataacaaataccgcacgaccctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatcagcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcataccccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttttggccgacagcatcccacacagaaagaggaataa +>locus_17:16:0:8 +atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa +>locus_18:17:0:9 +atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag +>locus_19:18:0:10 +atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacattgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccaccgtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggccgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattactggaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcattcagggcggtacggaagtttgtcagcaaaaactggataccatgcgcaacgcaggcgtaaaagtgaacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggcgtcagcgcagagatcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacaggagaccggcaagctcggcgagatcctgttctttatgcgtgcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccggcaataactggcagttcgacggcgacgcggaaactattgcccactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgaaattaccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa +>locus_2:1:0:11 +atgaaactttttctgacaacggcagcgctaactgcgaccttgatttccggcatggcatttgccagcgatcccgtcatcccctgggcgactaacagcggcggtacggaaagcacgcatattgcggcaatgggcgaggatttgaatgcgcagcatcagcagatcacccacacgcatgaaggcgtatgggcggccaattccggcagcattcaggctgatgaagccgcgctgaccagcaacaagccgccggtacaaggccacccggaattgatgccgcatcagggctaa +>locus_20:19:0:12 +atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa +>locus_3:2:0:13 +atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa +>locus_4:3:0:14 +ctgaaactaatacctttttatttgctcgcgctattttctgccgcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaaccactgcagataacccgccttataccatgacaatattgccgccagtagaaggctgcggggaaaaatgtgtgaaactgaacgtacagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaaggacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctataaccgacagctgcaattactttgggctggcgtaataccgaaaaataaaaaggggaaatggataacgaaaatggaaaattactggttcaggcaggtaaaagcgcactaa +>locus_5:4:0:15 +atgaaaaaattttatagctgtcttcctgtctttttactgatcggctgtgcccaggtgcccctcccttcctccgtgagcaaaccggtacagcaacctggcgctcagaaagagcaactggccaacgcaaatagtattgatgagtgtcagtctcttccgtatgtgccgtcagaccttgcgaagaataaatcattatcaaaccagaacgctgataattccgcatcaaaaaatagcgcaatcagctcaagcattttttgcgaaaaatataaacaaaccaaagaacaggcgcttaccttcttccaggaacatccacaatacatgcgttcgaaagaagatgaagagcaactcatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa +>locus_6:5:0:16 +atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa +>locus_7:6:0:17 +gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggcctcacaacatgcctggagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgtcagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcaccatccagtcggcaacctggttttccgcaggcaggtcgcttacggcggataaaaaaatgatggattgcggcagcggtatttatgcttcaatcaatacgttactgaaaaagtcgcagaataaaaatatagtcatttttacccataatcattgtttgacgtatattgcaaaaaataagcgcggcgtgaagtttgatccggactatcttaatgcgctcgttatgtatgcggaaaacggaaaactctttttagacggtgaatttgttcccggctag +>locus_8:7:0:18 +atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga +>locus_9:8:0:19 +atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/extract/G5/raw.extracted.seqs.fasta b/tests/test_data/outputs/extract/G5/raw.extracted.seqs.fasta new file mode 100755 index 0000000..dcf41b0 --- /dev/null +++ b/tests/test_data/outputs/extract/G5/raw.extracted.seqs.fasta @@ -0,0 +1,40 @@ +>locus_1:0:0:0 +atgtactgaacaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa +>locus_10:9:0:1 +atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa +>locus_11:10:0:2 +gtgaacctggtgaaaaccacgggtctgcacgcggcgggcaaaggcattaacgttgccaaagtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccgacttcaacttttccggctttgacgtgactcccgcagactgggaacgctttgttaacgactccctgagctggctgggtcagttcgatatggtctgcgtcagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatctttgatagtagccgtgaagcgttagtcgccggtcttaaagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcacgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgtgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggtaagtaccgtcggcgcaggcgattccatggttggcggattgatttacggcctgctgatgcgcgagtccagcgaacatacgctgcgtctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga +>locus_12:11:0:3 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>locus_13:12:0:4 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>locus_14:13:0:5 +ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctcttccgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccaacgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcagaaaggcgggattggtttgcgcggtccgggtgaaacccagctcgaaaccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgaaaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgctgctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaaatccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtctgcaggtacgaatgccgattgtcgactggcgtcgcctctgtaaacaagaaccagcgttgatcgaatacgtgatctag +>locus_15:14:0:6 +gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga +>locus_16:15:0:7 +atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctggtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatcagcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacgctgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcattcaaccggaatggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaacgctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgaggcgctggcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtgagctgaccagccttgtgcgcaaccttaatcaactgctcaaaagcgagcgcgaacgttataacaaataccgcacgaccctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatcagcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcataccccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttttggccgacagcatcccacacagaaagaggaataa +>locus_17:16:0:8 +atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa +>locus_18:17:0:9 +atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag +>locus_19:18:0:10 +atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacattgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccaccgtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggccgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattactggaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcattcagggcggtacggaagtttgtcagcaaaaactggataccatgcgcaacgcaggcgtaaaagtgaacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggcgtcagcgcagagatcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacaggagaccggcaagctcggcgagatcctgttctttatgcgtgcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccggcaataactggcagttcgacggcgacgcggaaactattgcccactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgaaattaccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa +>locus_2:1:0:11 +atgaaactttttctgacaacggcagcgctaactgcgaccttgatttccggcatggcatttgccagcgatcccgtcatcccctgggcgactaacagcggcggtacggaaagcacgcatattgcggcaatgggcgaggatttgaatgcgcagcatcagcagatcacccacacgcatgaaggcgtatgggcggccaattccggcagcattcaggctgatgaagccgcgctgaccagcaacaagccgccggtacaaggccacccggaattgatgccgcatcagggctaa +>locus_20:19:0:12 +atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa +>locus_3:2:0:13 +atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa +>locus_4:3:0:14 +ctgaaactaatacctttttatttgctcgcgctattttctgccgcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaaccactgcagataacccgccttataccatgacaatattgccgccagtagaaggctgcggggaaaaatgtgtgaaactgaacgtacagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaaggacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctataaccgacagctgcaattactttgggctggcgtaataccgaaaaataaaaaggggaaatggataacgaaaatggaaaattactggttcaggcaggtaaaagcgcactaa +>locus_5:4:0:15 +atgaaaaaattttatagctgtcttcctgtctttttactgatcggctgtgcccaggtgcccctcccttcctccgtgagcaaaccggtacagcaacctggcgctcagaaagagcaactggccaacgcaaatagtattgatgagtgtcagtctcttccgtatgtgccgtcagaccttgcgaagaataaatcattatcaaaccagaacgctgataattccgcatcaaaaaatagcgcaatcagctcaagcattttttgcgaaaaatataaacaaaccaaagaacaggcgcttaccttcttccaggaacatccacaatacatgcgttcgaaagaagatgaagagcaactcatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa +>locus_6:5:0:16 +atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa +>locus_7:6:0:17 +gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggcctcacaacatgcctggagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgtcagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcaccatccagtcggcaacctggttttccgcaggcaggtcgcttacggcggataaaaaaatgatggattgcggcagcggtatttatgcttcaatcaatacgttactgaaaaagtcgcagaataaaaatatagtcatttttacccataatcattgtttgacgtatattgcaaaaaataagcgcggcgtgaagtttgatccggactatcttaatgcgctcgttatgtatgcggaaaacggaaaactctttttagacggtgaatttgttcccggctag +>locus_8:7:0:18 +atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga +>locus_9:8:0:19 +atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/extract/G5/seq_data.txt b/tests/test_data/outputs/extract/G5/seq_data.txt new file mode 100755 index 0000000..8090c2b --- /dev/null +++ b/tests/test_data/outputs/extract/G5/seq_data.txt @@ -0,0 +1,21 @@ +id seqid locus_name query_id qlen start end sub_start sub_ent ident qcovs bitscore reverse complement is_complete is_trunc fivep_trunc threep_trunc is_extended is_5p_extended is_3p_extended seq start_codon stop_codon is_stop_valid is_start_valid is_cds_valid +0 0 locus_1 0 102 11 113 11 112 97.087 100 172 False True True False False False False False False atgtactgaacaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa atg taa True True True +1 0 locus_10 9 762 124 886 124 885 100.0 100 1408 False True True False False False False False False atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa atg taa True True True +2 0 locus_11 10 858 897 1755 897 1754 97.786 100 1480 False True True False False False False False False gtgaacctggtgaaaaccacgggtctgcacgcggcgggcaaaggcattaacgttgccaaagtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccgacttcaacttttccggctttgacgtgactcccgcagactgggaacgctttgttaacgactccctgagctggctgggtcagttcgatatggtctgcgtcagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatctttgatagtagccgtgaagcgttagtcgccggtcttaaagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcacgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgtgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggtaagtaccgtcggcgcaggcgattccatggttggcggattgatttacggcctgctgatgcgcgagtccagcgaacatacgctgcgtctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga gtg tga True True True +3 0 locus_12 11 972 1766 2738 1766 2737 100.0 100 1796 False True True False False False False False False atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga atg tga True True True +4 0 locus_13 12 1098 2749 3847 2749 3846 100.0 100 2028 False True True False False False False False False atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga atg tga True True True +5 0 locus_14 13 1281 3858 5139 3858 5138 99.141 100 2305 False True True False False False False False False ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctcttccgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccaacgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcagaaaggcgggattggtttgcgcggtccgggtgaaacccagctcgaaaccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgaaaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgctgctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaaatccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtctgcaggtacgaatgccgattgtcgactggcgtcgcctctgtaaacaagaaccagcgttgatcgaatacgtgatctag ttg tag True True True +6 0 locus_15 14 1434 5150 6584 5150 6583 100.0 100 2649 False True True False False False False False False gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga gtg tga True True True +7 0 locus_16 15 1464 6595 8059 6595 8058 98.975 100 2621 False True True False False False False False False atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctggtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatcagcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacgctgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcattcaaccggaatggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaacgctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgaggcgctggcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtgagctgaccagccttgtgcgcaaccttaatcaactgctcaaaagcgagcgcgaacgttataacaaataccgcacgaccctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatcagcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcataccccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttttggccgacagcatcccacacagaaagaggaataa atg taa True True True +8 0 locus_17 16 1836 8070 9906 8070 9905 100.0 100 3391 False True True False False False False False False atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa atg taa True True True +9 0 locus_18 17 1914 9917 11831 9917 11830 100.0 100 3535 False True True False False False False False False atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag atg tag True True True +10 0 locus_19 18 2037 11842 13879 11842 13878 99.215 100 3674 False True True False False False False False False atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacattgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccaccgtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggccgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattactggaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcattcagggcggtacggaagtttgtcagcaaaaactggataccatgcgcaacgcaggcgtaaaagtgaacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggcgtcagcgcagagatcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacaggagaccggcaagctcggcgagatcctgttctttatgcgtgcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccggcaataactggcagttcgacggcgacgcggaaactattgcccactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgaaattaccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa atg taa True True True +11 0 locus_2 1 285 13890 14175 13890 14174 94.035 100 433 False True True False False False False False False atgaaactttttctgacaacggcagcgctaactgcgaccttgatttccggcatggcatttgccagcgatcccgtcatcccctgggcgactaacagcggcggtacggaaagcacgcatattgcggcaatgggcgaggatttgaatgcgcagcatcagcagatcacccacacgcatgaaggcgtatgggcggccaattccggcagcattcaggctgatgaagccgcgctgaccagcaacaagccgccggtacaaggccacccggaattgatgccgcatcagggctaa atg taa True True True +12 0 locus_20 19 4935 14186 19121 14186 19120 100.0 100 9114 False True True False False False False False False atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa atg taa True True True +13 0 locus_3 2 327 19132 19459 19132 19458 100.0 100 604 False True True False False False False False False atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa atg taa True True True +14 0 locus_4 3 417 19470 19887 19470 19886 97.362 100 710 False True True False False False False False False ctgaaactaatacctttttatttgctcgcgctattttctgccgcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaaccactgcagataacccgccttataccatgacaatattgccgccagtagaaggctgcggggaaaaatgtgtgaaactgaacgtacagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaaggacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctataaccgacagctgcaattactttgggctggcgtaataccgaaaaataaaaaggggaaatggataacgaaaatggaaaattactggttcaggcaggtaaaagcgcactaa ctg taa True True True +15 0 locus_5 4 444 19898 20342 19898 20341 96.622 100 737 False True True False False False False False False atgaaaaaattttatagctgtcttcctgtctttttactgatcggctgtgcccaggtgcccctcccttcctccgtgagcaaaccggtacagcaacctggcgctcagaaagagcaactggccaacgcaaatagtattgatgagtgtcagtctcttccgtatgtgccgtcagaccttgcgaagaataaatcattatcaaaccagaacgctgataattccgcatcaaaaaatagcgcaatcagctcaagcattttttgcgaaaaatataaacaaaccaaagaacaggcgcttaccttcttccaggaacatccacaatacatgcgttcgaaagaagatgaagagcaactcatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa atg taa True True True +16 0 locus_6 5 543 20353 20896 20353 20895 100.0 100 1003 False True True False False False False False False atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa atg taa True True True +17 0 locus_7 6 606 20907 21513 20907 21512 97.525 100 1037 False True True False False False False False False gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggcctcacaacatgcctggagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgtcagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcaccatccagtcggcaacctggttttccgcaggcaggtcgcttacggcggataaaaaaatgatggattgcggcagcggtatttatgcttcaatcaatacgttactgaaaaagtcgcagaataaaaatatagtcatttttacccataatcattgtttgacgtatattgcaaaaaataagcgcggcgtgaagtttgatccggactatcttaatgcgctcgttatgtatgcggaaaacggaaaactctttttagacggtgaatttgttcccggctag gtg tag True True True +18 0 locus_8 7 642 21524 22166 21524 22165 100.0 100 1186 False True True False False False False False False atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga atg tga True True True +19 0 locus_9 8 684 22177 22861 22177 22860 100.0 100 1264 False True True False False False False False False atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag atg tag True True True diff --git a/tests/test_data/outputs/extract/G6/blast/hsps.txt b/tests/test_data/outputs/extract/G6/blast/hsps.txt new file mode 100755 index 0000000..6dfa6df --- /dev/null +++ b/tests/test_data/outputs/extract/G6/blast/hsps.txt @@ -0,0 +1,20 @@ +0 0 102 22872 1 102 12 113 102 2 98.039 100 100 plus 3.98e-48 178 +1 0 285 22872 1 285 13891 14175 285 0 100.000 100 100 plus 1.05e-152 527 +2 0 327 22872 1 327 19133 19459 327 0 100.000 100 100 plus 5.47e-176 604 +3 0 417 22872 1 417 19471 19887 417 0 100.000 100 100 plus 0.0 771 +4 0 444 22872 1 444 19899 20342 444 0 100.000 100 100 plus 0.0 821 +5 0 543 22872 1 543 20354 20896 543 0 100.000 100 100 plus 0.0 1003 +6 0 606 22872 1 606 20908 21513 606 0 100.000 100 100 plus 0.0 1120 +7 0 642 22872 1 642 21525 22166 642 0 100.000 100 100 plus 0.0 1186 +8 0 684 22872 1 684 22178 22861 684 0 100.000 100 100 plus 0.0 1264 +9 0 762 22872 1 762 125 886 762 0 100.000 100 100 plus 0.0 1408 +10 0 858 22872 1 858 898 1755 858 0 100.000 100 100 plus 0.0 1585 +11 0 972 22872 1 972 1767 2738 972 0 100.000 100 100 plus 0.0 1796 +12 0 1098 22872 1 1098 2750 3847 1098 0 100.000 100 100 plus 0.0 2028 +13 0 1281 22872 1 1281 3859 5139 1281 0 100.000 100 100 plus 0.0 2366 +14 0 1434 22872 1 1434 5151 6584 1434 0 100.000 100 100 plus 0.0 2649 +15 0 1464 22872 1 1464 6596 8059 1464 0 100.000 100 100 plus 0.0 2704 +16 0 1836 22872 1 1836 8071 9906 1836 0 100.000 100 100 plus 0.0 3391 +17 0 1914 22872 1 1914 9918 11831 1914 0 100.000 100 100 plus 0.0 3535 +18 0 2037 22872 1 2037 11843 13879 2037 0 100.000 100 100 plus 0.0 3762 +19 0 4935 22872 1 4935 14187 19121 4935 0 100.000 100 100 plus 0.0 9114 diff --git a/tests/test_data/outputs/extract/G6/blast_db/contigs.fasta b/tests/test_data/outputs/extract/G6/blast_db/contigs.fasta new file mode 100755 index 0000000..1022776 --- /dev/null +++ b/tests/test_data/outputs/extract/G6/blast_db/contigs.fasta @@ -0,0 +1,2 @@ +>0 +nnnnnnnnnnnatgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtgattcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaannnnnnnnnnnatgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaannnnnnnnnnngtgaacctggtgaaaaccccgggtctgcacgcggcgggcaaaggcattaacgttgccaatgtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccaacttcaactttcccggctttgacgtgactcccgcagactgggaacgctttgttaacgacttcctgagctggctgggtcagttcgatatggtctgcgtaagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatcgttgatactagccgtgtagagttagtcgccggtcttatagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcccgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgcgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggaaaataccgtcggcgcaggcggttccatggttggcggatggatttacggcctgctgatgcgcgagtccaccgaacatacgctgcgcctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactgannnnnnnnnnnatgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatgannnnnnnnnnnatgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctgannnnnnnnnnnttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctattacgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccagcgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcggaaaggcgggattggttcgcgcggtccgggtgaaacccagctcgaagccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgagaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgcagctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaattccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtccgcaggtacgaatgccgattgtcgactggcgtcgcttctgtaaacaagaaccagcgttgatcgaatacgtgatctagnnnnnnnnnnngtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatgannnnnnnnnnnatgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctcgtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatctgcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacggtgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcactcaaccggaagggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaaggctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgatgcgctgtcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtaagctgatcagccttgtgcgcaaccttaatcaactgctcaaaagcgagcacgaacgttataacaaataccgcacgagcctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatctgcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcatagcccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttctggccgacagcatcccacacagaaagaggaataannnnnnnnnnnatgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataannnnnnnnnnnatgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtagnnnnnnnnnnnatgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacatcgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccacagtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggtcgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattacttgaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcatacagggcggtacggaagtttgtcagcaaaaactggataacatgcgcaacgcaggcgtaaaagtgtacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggggtcagcgcagagctcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacagaagaccggcaagctcggcgagatcctgttctttatgcgggcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccagcaatgactggcagttcgacggcgacgcggaaactattgcacactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgagatttccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaannnnnnnnnnnatgaaactttttctgacaacggcagcgctaactgcgaccttgacttccggcatgggatttgccagcgataccgtgatcccctgggcgactaacagtggcggtacggaaagcacgcatatcgcggcaatgggcgaggatgtgaatgcgcagcatcagcacatcatccacacgcatgaaggcgtatgtgccgcaaattccggcaccattcaggctgatgaagccgcgctgaccagcaacaagccaccggtacaagtccaaccggaattgttgccgcatcagggctaannnnnnnnnnnatgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaannnnnnnnnnnatggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaannnnnnnnnnnctgaaactaatacctttttatttgctcgcgctattttctgcctcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaacccctgcagataacccgccttataccgtgacaatattgccgccagtagaagcctgcggggaaaaatgtgtgaaactgaacgtagagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaacgacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctacaaccgacagctgcaattacttagggctggcgtaataccgaaaaataaaaagtggaaatggataacgaagatggaaaattactggttcaggcgggtaaaagcgcactaannnnnnnnnnnatgaaaaaatttgatagctgtcatcctgtctttttactgatcggctgtgcccaggtgcctctccgttcctccgtgagcaaaccggtacagcaacctagcgctcagaaagagcaactggccaacgcaaatggtattgatgagtgtcagtcacttccgtatgtgccttcagaccttgcgaagaataaatcattatcaaaccagatcgctgataataccgcatcaaaaaatagcgcaatcagctcacgcattttttgcgaaaaatataagcaaaccaaagaacaggcgcttaccttcttccaggaacttccacaatacatgcgttcgaaagaagttgaagagcaacacatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataannnnnnnnnnnatgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaannnnnnnnnnngtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggactcacaacatgcctgtagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgccagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcgccatccagtcggcaacctggttttcggcaggcaggtcgcgtagggcggttaaaaaaatgatggattgcggcagcggtatttattcttcaatcaatacgttactgaaaaattcgcagattaaaaatatagtcattttaacccataatcattgtttgacgtatattgtaaaaaataagcgcggcgtgaagtttgatccggagtatcttaatgcgctcgttatgtatgcggaaaacggaaaactccttttagacggtgaatttgttcccggctagnnnnnnnnnnnatgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatgannnnnnnnnnnatgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctagnnnnnnnnnnn diff --git a/tests/test_data/outputs/extract/G6/blast_db/contigs.fasta.ndb b/tests/test_data/outputs/extract/G6/blast_db/contigs.fasta.ndb new file mode 100755 index 0000000000000000000000000000000000000000..5fd6f7085890c929e9fa647574a304e6e7c1d317 GIT binary patch literal 20480 zcmeI&O-jR15CGt@3T7h_@eblfJc3sVVx^$TqQ!Nu)s6HdF1xii(B|hgQK1OcMd8aL zZ(j2%Z#3(Lfo-gxxPK5vg0t5&UAV7cs0RjXF5cpex+fBMroW@J_R2I(m+4ypqiHV!bpVKM|-$ z5Jf;QFC_^mDCSj~r(kHHqhMrUWTK#8XlP|%so>}flwpjBgjp0B0cAOW#DSVN05MQZ GYy$w$10Ko% literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G6/blast_db/contigs.fasta.njs b/tests/test_data/outputs/extract/G6/blast_db/contigs.fasta.njs new file mode 100755 index 0000000..69df09f --- /dev/null +++ b/tests/test_data/outputs/extract/G6/blast_db/contigs.fasta.njs @@ -0,0 +1,22 @@ +{ + "version": "1.2", + "dbname": "contigs.fasta", + "dbtype": "Nucleotide", + "db-version": 5, + "description": "locidex/extract/G6/blast_db/contigs.fasta", + "number-of-letters": 22872, + "number-of-sequences": 1, + "last-updated": "2024-06-10T11:09:00", + "number-of-volumes": 1, + "bytes-total": 42908, + "bytes-to-cache": 5952, + "files": [ + "contigs.fasta.ndb", + "contigs.fasta.nhr", + "contigs.fasta.nin", + "contigs.fasta.not", + "contigs.fasta.nsq", + "contigs.fasta.ntf", + "contigs.fasta.nto" + ] +} diff --git a/tests/test_data/outputs/extract/G6/blast_db/contigs.fasta.not b/tests/test_data/outputs/extract/G6/blast_db/contigs.fasta.not new file mode 100755 index 0000000000000000000000000000000000000000..d6562660b009ba390419e760e6e10a80c529e3d8 GIT binary patch literal 20 OcmZQ%fB;4)4Wa-5Gynqt literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G6/blast_db/contigs.fasta.nsq b/tests/test_data/outputs/extract/G6/blast_db/contigs.fasta.nsq new file mode 100755 index 0000000000000000000000000000000000000000..4deaf6e87ece329d81bc6ecff532043c75b7fe31 GIT binary patch literal 5808 zcmWModpwhi<9^-73U%Z*&XK~TE5~*2Qfd>5b7XVUcDB9S3_E^xWUu6!QZ8%L+Pi5f zdPPI!5<7CKL&y0hrIJya`$j2A$1Q$6pU?CB@q9kdzt8gk3V-YO)FYgCnT!HQe8Z?B zY~@8y%ZK-iua%WQY9Nz1fp791YR>KFFzJau1JPtiE-sGV-4S=0IjF;DN&c8W$tZcp z7~DLYmgh-zOEL1iE;RLx@1Eiq9yg2!`|_4KeYNf4`YcE^+W2N(mO>X(ORDPLmd}@c zX8(EG;soA%)Pif*cm_4efg0v8&;@)suuiM^HgU9yVRrQVCdRhk1yz-9uyv^ zIO&==gsy#0tnsoz-K847191X0}p^w%(< zus1|oR&d;ogsl!X9NB&8*|ApFNrvmIv?W4w5(MG7m4@V9LI+fNg!He7zg3$<8m=wZ zPl2#HFHYfo(`&!$9+SLKxtBY;igBCU9a8ZK@tHnf{%`TZWW~LBZGMr}nX9}1SWqs! z`tq6AdEczc_yrsD5&S$b;XAzFHQ8xvh`%Qd@Z3?CRq*HbP|chrp8kx85QkG6Lfn)W z{-&mGY`p%%oDx41yVO_H8h9oshlSp_>2r&}6Z*qlAAkyQEc-4{ zF_)gk@0oVPK?hY1rW+Gwmf>Gw6Q9uIA2=uuf#C=zyyL`)o#RCsV$5tj@v==KAC_Vr z@4Bj?ojy`e$24!%2Lhk{WZ_%2=l_Vx)RpFO8K735R_t7AYnr2ijk^Io8!sxqxRX3} zK69T4n>nGJRq=J=AKZV}Ml@K1+5Sj4<1VAHBG%tE#^mJ@5yvZD|a2qd`Jv!j2xPQADfQ1OD{jblg;k|ppTF*9* z3LE+2$FhBI))adFd8#dcZfngtRn>!GH=qp(qa89~+a+4$`pxgF*FEGfY!IUpG%0F6 zJ8pU5g<$060ug)3f@E1il-Qd8dW2AcgHQA?nWfX-WC@BWF+p zn&QG5kWFgT!S~ypZCK&nQTUD(27@u@*N>o4BLsx65bPCN_E~gG<;5K+67cfqnt{!M zOt9)H%Ld;wfl>Wx**WOzg)JBuG5rt8V>_($Ma;%gA@%qQx`VqdC*C0kKPiy+Pl&w+ zo3?4VImoo5-(}hiUp4pn<+x#^(%;QlnzLC+tfg%A*s8A+0S1n(r}ObQ9>B*~eiHJ59IKx@t21sC+#*xo)ur(x7GMct74UfVjHlb>ao|8Ko= z83_;!)eq3I#obl5G4~gXciC1oC_22v*pa!DYfSI6Key3 zelvUw40@Ul$;;XxvI9L{U^cloM$~!lXGn82tUhkVF>$dmepbcFqmi&#t6$$O&%F}g$jy9h10%p=s~uC>wwe_2ZCt9DsNt+h zMR-=jWJ21hvZsRG@HFd*Wc{k6j^)=Ou9$3V*81cqUF(*Xz81H|{z|(0b)06T@o=@D zxzd8?J$Ic?+w`k?w9WRUX=V*zZjl= zCT{EL<_2L(g~9D1@@cB^d|qyw!)xbimGnWarBrx$CFOr%#~;hxh>+?U=C}Td6tJXT zv^mq|IjD8jfSH52BXzklLo<66FzJuAC3778J1*t^5fy_mS^?-+#cTYX9L&4UfOMPO zI-Y$7y8zP8-&Hz~w_)|T3&8NF;62t~AJL$$AW|V*{--_pEqKy#PxnMyrLudcs^|H^ z{5{L8T!6CLztfW!2W@1$*I$PAd^rEDoW6yU?b&fW z3tvssX&|-dxY8RL2%m9nmj|tS-SfZh3zxwa%k~==Xs>3c>8gihi^L)~t8{dYm4u5l zOIdlB!RSXr6C%Yavn<+76AIhe0;a8)*XTHe2>(&$?{SU$?>gLbH0t2*jaIyMYPzti z_$Og^-{=>lo@IJR$2b~9Pa=Xx&pesxjFxEEas&>D)nw7=!#5NIpk$&q^;AazEgfBJ zdI1xR8H?gh(eNZZz7P|FtI%NxAWNyde?$;iSU6tb6}0M$86whRV_&SD5h?Qbl=Uqj zNJnYEtu@QaZ3MK{(zU!pg4XrK$xgD)6tdH7_Cl#|c=*lTO zSO$cYHT?Egtn*g%U0;7br4Sd3Qz>C}#Zx4aTO$nxurRaC^-K8|kJ^CAWq5*oGfdrB zX=P{{mg`W^WNQw55r@!DF=Wb$dYQ)}jQSZ|D)&Q)dSr`N^Tqtz-&=FJfMgNmvWtTc zkLEHrMn4N}%9b+C>!!yJ8;wBFK?L0nR8L0asrOsS>)48kmGBwiDG!y)g zzvvGsu^^gPg&iU>9!$RCyN><9nfT(3K~}I62?nz@j;wQ=zpr@Vx@Wy1!UVTn$6bBc z8JtQ8UD=N=Sx{F)wDGdxXzXbGd(SydQ{|JdQg~E--lpQvjydQBS?`0=jXDGA-ORm` zA)i-35(#|&=G*P{vfaGbBv!fs@bwBJqE&NstIr>m!GXmKDhT3c0sZ7MT@i}^J5QVE)f|FNmjWaugEZe zjOydedYsCVd|oAB=~GNznmgXBNZDk~gG_ijO#(W#J4ct7TZ&|B-L2c4l=89x#{{Rc zsQ7n%P2(fcNO9WQdg!~S6ptm z4z%Mo#;l4SXR?R?T23W-{;y%j$TE|RIB~?z(m*dB$ab_8T-V%Y1I3BViZd~z441s%?s=*I;wCdQZK|}k~yVZfO2*$LZ=p* zy+SK?-E!7Bv0O|JemnQo4=%HNK%6;?)vjo1cm#x>sEgtAt=i6GSjdmF3&&H5I%ijV zgAVST9k-CGM-r4I zFhcH@wAS%7RP8t(4QbomS-S*pgZgTT9>>zkunArz(E5_D>>pp^Rb9+yMk+mPEBmHK2OC74esX*lI=UK9%y=n0M?;y4Az(! z@??HP^oOFNg1W~7Dtl?UF9fKV%a>-5^>-388Af`fy-A|{ly^^KV`okIy;?r|J!Z-f znu3KhT+UyL+_D3L@`p zyip<`g>BIU9`+=|JLeb~wx=Mh9B49UE%5bcS0%yk&xGmc@`Zcy%{al$L#;gyZ;nGc^{D!yu6GZNGlTFQcRlO(!yq*6V|D(p`IOS z@yb~1hgzN3oP~D=EQvliqA@9rcu4iOlLk0c z)OsOzl)cjIMBSes2@_jZj&|h@B=pguJANL3oYOFxG3gc{yz7>)4JTDOs#%r;(GYVH zNXVWEP?hJQ5xB<+zO%JrOAxpJEoQzPa&Nf!>4Ox$TyC^C_}$a5mofQoqDUfC^Xq`p z$8esA4KOa}>u}Jj-FI}hTwh-h7aebBSAsg{#8J)>Roy81+431dJse;vdbBukamw$A zTrUmB6nioPl36@pzZB+{UQqh9ty z-(Wix`GIcElB2_Qxw5fo+-^57llC=wOy=|VO}oh(L*hZw^Fi<*zG(G|(5ab}HF#tX zy$CJq{4v_XX@9eesc%0yn@1P)_q1XH;O)Ny=<2>!qo(o@5UV)n4e3^uVAnyxwAOsU z22xezZjt6I!2%7YW^XD<<%z^aLp%Io@`yz*{y{4wEbdZoYsXr2AuI{WE#BmrRUSI4 zyWSx?B7M=B4zua9b#q12H(rXSi8FEnmD0rGSE#O<&sXPZKn6wBQ|`f->FRhARV!&; zLZC3g?aPV0I^cHORnO&>wHuVNE z%t*DvgRgY}!(t-u4b{&gyl!DT0g;M1l2tJkF+YwGGd1Wk9bBM`f={3cT^ND6+sT!K zC@xif?h`9xqQ;X}rovw@Tv8yK{te4?IJi(~pM?Y0pxEsN-$c;l^}54Drlyex8Ckvz zdXc;t(!I7zTNuC&fan!Xm-f#M_T7c0nXT$fkpY1chP|J~jkPuz0P&7|wHB+X{-&CH z*rKhgY*7lwZnZCaA8spv{Nok)pxNykxgXIbI>M9g2xF5XS83ZF7dx)nXdE6p_XbH>Q&9Li42=Dqkk?gD zzftswROTV#PMqLwdnKbveVV)SoWhIlt_e?;cM_#{)e^ilmN)=%MFvyY=v@xwz2}6< zW4>jIW^L;2f^ZC?chj3up6n!y-$rH`=83&odrrZ5m?5tZ&5&M(POoVWO#|*hE53~o zje9qawKCN*QwSSn~y%FeR3k~CMP zanQXyn@_hJzk$6+ZUTl*yAMZJA_@q2GfF%;$;*QQ>PP-c)_Vmw)YZ&C-0dSj!vtE{bF8nGFm3(9F*p$5WVE;p_ z{^6BR!_5m<^mUO{3w*iR&8c-#zgm9!K->E$>kx;+raN*74eKme*Jt1})+nvj*B>Sj z@2CIKvDPyQp7=KYnpAFKqj80|C3!$wrE!$+7Ib`#H0+tqc*Q~3|YOFDm>8gVyl-VVh%k+p!`>8kcjPhfVsS9}M3 z>0vAHMY`5P*9X@RYmf8Go5Gt;MZb>D)nFS1{2$|>9uGQ9GprrGSXY-CH*@^paglY~ zi*AjBicx){3-B||3+6V!uU{x(ZwB#nSLDe`KcBmQagk^^&BdG?h=0IKWmn+J>>`!r>7L#QwM=>j( zG&G98iT*<>IW^vjsimJbhQ@TsEZz`p+GP$>F#|!{_T3w7o-Z2Dt)_CK9+_P!%Dz$Bmamkqp_)&T-RaSv$*3_X2W*xI>wfq|Bf(>Krx z0q0$Q?GXmPtCr610Y?~7w&pWyPvfpOzn@6A0RCLtQoZ40WU-8C>dAlfWH98yl@f#m z7<1vA>F>vNBw(s1Zd2h(Wqs}iH6e$<_3 zCa{`^Jgs&UDT3USOdM(nmD@qVsDFf1bO2KjEk*nWW6W|=+)L}|B@N{O7>)%+tNDTv zg27thyLM(0$Z#)#SiEy!H!WgV^otO}#p6U_;E`(c-PHtJtl;9vZv`5gG_|5z8hWfY z7Ja;o!}~6)OCqgW`&s%PBZGl~_pIBpd02zx-WN4)Em5xs&$ZAyTD_k~9$QnK!NvMp z2m9RaVL*2sQ z&x5S(1#=yD9FLm(Hs`T}85 zmFUBM@#gx}F5o*%mcGk(CN2S5NlO4~a0ys?-~+wHyt nxP7Pl@g;z8cL_M8wFHFJECFHPb4N;+fM3u{K%{U9Acp)utK~d8 literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G6/blast_db/contigs.fasta.ntf b/tests/test_data/outputs/extract/G6/blast_db/contigs.fasta.ntf new file mode 100755 index 0000000000000000000000000000000000000000..005ac416d78d808991db961db8e272a0664078b1 GIT binary patch literal 16384 zcmeI&O=`kW5CGt5>cWK(D?Lc@1YSi@p{p+HqW9@Fr1T_$d!a|DnwR0}#*Gvzd|71X z{g6z)-S85T*RV^r-#=duIdzJkyQgkAju_6m?CR~ezpidNTg?3z?>t+V5+Fc;009C7 z2oNAZfB*pkw+qz1`!D+c`t+KWnseOrpYYhnoWK3cJb?fK0t5&UAV7cs0RjXF5J-XS zzayHM#HQV}xgruEK!5-N0t5&UAV7cs0RsO);N@6E6QfwqKVBEpx7F%>xjxnP9Q72v tW~JgB=Xra^(*N(D`o%y literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G6/blast_db/contigs.fasta.nto b/tests/test_data/outputs/extract/G6/blast_db/contigs.fasta.nto new file mode 100755 index 0000000000000000000000000000000000000000..20d5cb86e6dff1f3684dc229a358a2ea697cecfb GIT binary patch literal 8 KcmZQ%fB*mh5C8%I literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G6/filtered.hsps.txt b/tests/test_data/outputs/extract/G6/filtered.hsps.txt new file mode 100755 index 0000000..ca3b2cb --- /dev/null +++ b/tests/test_data/outputs/extract/G6/filtered.hsps.txt @@ -0,0 +1,21 @@ +qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore locus_name ext_start ext_end is_5prime_complete is_3prime_complete is_complete is_5prime_boundary is_3prime_boundary is_on_boundary reverse complement is_extended is_5p_extended is_3p_extended +0 0 102 22872 0 101 11 112 102 2 98.039 100 100 plus 3.98e-48 178 locus_1 11 112 True True True False False False False True False False False +9 0 762 22872 0 761 124 885 762 0 100.0 100 100 plus 0.0 1408 locus_10 124 885 True True True False False False False True False False False +10 0 858 22872 0 857 897 1754 858 0 100.0 100 100 plus 0.0 1585 locus_11 897 1754 True True True False False False False True False False False +11 0 972 22872 0 971 1766 2737 972 0 100.0 100 100 plus 0.0 1796 locus_12 1766 2737 True True True False False False False True False False False +12 0 1098 22872 0 1097 2749 3846 1098 0 100.0 100 100 plus 0.0 2028 locus_13 2749 3846 True True True False False False False True False False False +13 0 1281 22872 0 1280 3858 5138 1281 0 100.0 100 100 plus 0.0 2366 locus_14 3858 5138 True True True False False False False True False False False +14 0 1434 22872 0 1433 5150 6583 1434 0 100.0 100 100 plus 0.0 2649 locus_15 5150 6583 True True True False False False False True False False False +15 0 1464 22872 0 1463 6595 8058 1464 0 100.0 100 100 plus 0.0 2704 locus_16 6595 8058 True True True False False False False True False False False +16 0 1836 22872 0 1835 8070 9905 1836 0 100.0 100 100 plus 0.0 3391 locus_17 8070 9905 True True True False False False False True False False False +17 0 1914 22872 0 1913 9917 11830 1914 0 100.0 100 100 plus 0.0 3535 locus_18 9917 11830 True True True False False False False True False False False +18 0 2037 22872 0 2036 11842 13878 2037 0 100.0 100 100 plus 0.0 3762 locus_19 11842 13878 True True True False False False False True False False False +1 0 285 22872 0 284 13890 14174 285 0 100.0 100 100 plus 1.05e-152 527 locus_2 13890 14174 True True True False False False False True False False False +19 0 4935 22872 0 4934 14186 19120 4935 0 100.0 100 100 plus 0.0 9114 locus_20 14186 19120 True True True False False False False True False False False +2 0 327 22872 0 326 19132 19458 327 0 100.0 100 100 plus 5.47e-176 604 locus_3 19132 19458 True True True False False False False True False False False +3 0 417 22872 0 416 19470 19886 417 0 100.0 100 100 plus 0.0 771 locus_4 19470 19886 True True True False False False False True False False False +4 0 444 22872 0 443 19898 20341 444 0 100.0 100 100 plus 0.0 821 locus_5 19898 20341 True True True False False False False True False False False +5 0 543 22872 0 542 20353 20895 543 0 100.0 100 100 plus 0.0 1003 locus_6 20353 20895 True True True False False False False True False False False +6 0 606 22872 0 605 20907 21512 606 0 100.0 100 100 plus 0.0 1120 locus_7 20907 21512 True True True False False False False True False False False +7 0 642 22872 0 641 21524 22165 642 0 100.0 100 100 plus 0.0 1186 locus_8 21524 22165 True True True False False False False True False False False +8 0 684 22872 0 683 22177 22860 684 0 100.0 100 100 plus 0.0 1264 locus_9 22177 22860 True True True False False False False True False False False diff --git a/tests/test_data/outputs/extract/G6/processed.extracted.seqs.fasta b/tests/test_data/outputs/extract/G6/processed.extracted.seqs.fasta new file mode 100755 index 0000000..8c3c2ef --- /dev/null +++ b/tests/test_data/outputs/extract/G6/processed.extracted.seqs.fasta @@ -0,0 +1,40 @@ +>locus_1:0:0:0 +atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtgattcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa +>locus_10:9:0:1 +atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa +>locus_11:10:0:2 +gtgaacctggtgaaaaccccgggtctgcacgcggcgggcaaaggcattaacgttgccaatgtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccaacttcaactttcccggctttgacgtgactcccgcagactgggaacgctttgttaacgacttcctgagctggctgggtcagttcgatatggtctgcgtaagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatcgttgatactagccgtgtagagttagtcgccggtcttatagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcccgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgcgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggaaaataccgtcggcgcaggcggttccatggttggcggatggatttacggcctgctgatgcgcgagtccaccgaacatacgctgcgcctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga +>locus_12:11:0:3 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>locus_13:12:0:4 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>locus_14:13:0:5 +ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctattacgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccagcgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcggaaaggcgggattggttcgcgcggtccgggtgaaacccagctcgaagccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgagaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgcagctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaattccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtccgcaggtacgaatgccgattgtcgactggcgtcgcttctgtaaacaagaaccagcgttgatcgaatacgtgatctag +>locus_15:14:0:6 +gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga +>locus_16:15:0:7 +atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctcgtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatctgcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacggtgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcactcaaccggaagggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaaggctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgatgcgctgtcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtaagctgatcagccttgtgcgcaaccttaatcaactgctcaaaagcgagcacgaacgttataacaaataccgcacgagcctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatctgcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcatagcccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttctggccgacagcatcccacacagaaagaggaataa +>locus_17:16:0:8 +atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa +>locus_18:17:0:9 +atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag +>locus_19:18:0:10 +atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacatcgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccacagtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggtcgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattacttgaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcatacagggcggtacggaagtttgtcagcaaaaactggataacatgcgcaacgcaggcgtaaaagtgtacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggggtcagcgcagagctcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacagaagaccggcaagctcggcgagatcctgttctttatgcgggcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccagcaatgactggcagttcgacggcgacgcggaaactattgcacactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgagatttccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa +>locus_2:1:0:11 +atgaaactttttctgacaacggcagcgctaactgcgaccttgacttccggcatgggatttgccagcgataccgtgatcccctgggcgactaacagtggcggtacggaaagcacgcatatcgcggcaatgggcgaggatgtgaatgcgcagcatcagcacatcatccacacgcatgaaggcgtatgtgccgcaaattccggcaccattcaggctgatgaagccgcgctgaccagcaacaagccaccggtacaagtccaaccggaattgttgccgcatcagggctaa +>locus_20:19:0:12 +atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa +>locus_3:2:0:13 +atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa +>locus_4:3:0:14 +ctgaaactaatacctttttatttgctcgcgctattttctgcctcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaacccctgcagataacccgccttataccgtgacaatattgccgccagtagaagcctgcggggaaaaatgtgtgaaactgaacgtagagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaacgacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctacaaccgacagctgcaattacttagggctggcgtaataccgaaaaataaaaagtggaaatggataacgaagatggaaaattactggttcaggcgggtaaaagcgcactaa +>locus_5:4:0:15 +atgaaaaaatttgatagctgtcatcctgtctttttactgatcggctgtgcccaggtgcctctccgttcctccgtgagcaaaccggtacagcaacctagcgctcagaaagagcaactggccaacgcaaatggtattgatgagtgtcagtcacttccgtatgtgccttcagaccttgcgaagaataaatcattatcaaaccagatcgctgataataccgcatcaaaaaatagcgcaatcagctcacgcattttttgcgaaaaatataagcaaaccaaagaacaggcgcttaccttcttccaggaacttccacaatacatgcgttcgaaagaagttgaagagcaacacatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa +>locus_6:5:0:16 +atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa +>locus_7:6:0:17 +gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggactcacaacatgcctgtagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgccagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcgccatccagtcggcaacctggttttcggcaggcaggtcgcgtagggcggttaaaaaaatgatggattgcggcagcggtatttattcttcaatcaatacgttactgaaaaattcgcagattaaaaatatagtcattttaacccataatcattgtttgacgtatattgtaaaaaataagcgcggcgtgaagtttgatccggagtatcttaatgcgctcgttatgtatgcggaaaacggaaaactccttttagacggtgaatttgttcccggctag +>locus_8:7:0:18 +atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga +>locus_9:8:0:19 +atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/extract/G6/raw.extracted.seqs.fasta b/tests/test_data/outputs/extract/G6/raw.extracted.seqs.fasta new file mode 100755 index 0000000..8c3c2ef --- /dev/null +++ b/tests/test_data/outputs/extract/G6/raw.extracted.seqs.fasta @@ -0,0 +1,40 @@ +>locus_1:0:0:0 +atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtgattcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa +>locus_10:9:0:1 +atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa +>locus_11:10:0:2 +gtgaacctggtgaaaaccccgggtctgcacgcggcgggcaaaggcattaacgttgccaatgtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccaacttcaactttcccggctttgacgtgactcccgcagactgggaacgctttgttaacgacttcctgagctggctgggtcagttcgatatggtctgcgtaagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatcgttgatactagccgtgtagagttagtcgccggtcttatagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcccgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgcgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggaaaataccgtcggcgcaggcggttccatggttggcggatggatttacggcctgctgatgcgcgagtccaccgaacatacgctgcgcctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga +>locus_12:11:0:3 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>locus_13:12:0:4 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>locus_14:13:0:5 +ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctattacgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccagcgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcggaaaggcgggattggttcgcgcggtccgggtgaaacccagctcgaagccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgagaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgcagctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaattccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtccgcaggtacgaatgccgattgtcgactggcgtcgcttctgtaaacaagaaccagcgttgatcgaatacgtgatctag +>locus_15:14:0:6 +gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga +>locus_16:15:0:7 +atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctcgtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatctgcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacggtgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcactcaaccggaagggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaaggctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgatgcgctgtcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtaagctgatcagccttgtgcgcaaccttaatcaactgctcaaaagcgagcacgaacgttataacaaataccgcacgagcctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatctgcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcatagcccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttctggccgacagcatcccacacagaaagaggaataa +>locus_17:16:0:8 +atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa +>locus_18:17:0:9 +atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag +>locus_19:18:0:10 +atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacatcgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccacagtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggtcgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattacttgaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcatacagggcggtacggaagtttgtcagcaaaaactggataacatgcgcaacgcaggcgtaaaagtgtacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggggtcagcgcagagctcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacagaagaccggcaagctcggcgagatcctgttctttatgcgggcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccagcaatgactggcagttcgacggcgacgcggaaactattgcacactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgagatttccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa +>locus_2:1:0:11 +atgaaactttttctgacaacggcagcgctaactgcgaccttgacttccggcatgggatttgccagcgataccgtgatcccctgggcgactaacagtggcggtacggaaagcacgcatatcgcggcaatgggcgaggatgtgaatgcgcagcatcagcacatcatccacacgcatgaaggcgtatgtgccgcaaattccggcaccattcaggctgatgaagccgcgctgaccagcaacaagccaccggtacaagtccaaccggaattgttgccgcatcagggctaa +>locus_20:19:0:12 +atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa +>locus_3:2:0:13 +atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa +>locus_4:3:0:14 +ctgaaactaatacctttttatttgctcgcgctattttctgcctcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaacccctgcagataacccgccttataccgtgacaatattgccgccagtagaagcctgcggggaaaaatgtgtgaaactgaacgtagagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaacgacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctacaaccgacagctgcaattacttagggctggcgtaataccgaaaaataaaaagtggaaatggataacgaagatggaaaattactggttcaggcgggtaaaagcgcactaa +>locus_5:4:0:15 +atgaaaaaatttgatagctgtcatcctgtctttttactgatcggctgtgcccaggtgcctctccgttcctccgtgagcaaaccggtacagcaacctagcgctcagaaagagcaactggccaacgcaaatggtattgatgagtgtcagtcacttccgtatgtgccttcagaccttgcgaagaataaatcattatcaaaccagatcgctgataataccgcatcaaaaaatagcgcaatcagctcacgcattttttgcgaaaaatataagcaaaccaaagaacaggcgcttaccttcttccaggaacttccacaatacatgcgttcgaaagaagttgaagagcaacacatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa +>locus_6:5:0:16 +atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa +>locus_7:6:0:17 +gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggactcacaacatgcctgtagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgccagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcgccatccagtcggcaacctggttttcggcaggcaggtcgcgtagggcggttaaaaaaatgatggattgcggcagcggtatttattcttcaatcaatacgttactgaaaaattcgcagattaaaaatatagtcattttaacccataatcattgtttgacgtatattgtaaaaaataagcgcggcgtgaagtttgatccggagtatcttaatgcgctcgttatgtatgcggaaaacggaaaactccttttagacggtgaatttgttcccggctag +>locus_8:7:0:18 +atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga +>locus_9:8:0:19 +atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/extract/G6/seq_data.txt b/tests/test_data/outputs/extract/G6/seq_data.txt new file mode 100755 index 0000000..69dbe6d --- /dev/null +++ b/tests/test_data/outputs/extract/G6/seq_data.txt @@ -0,0 +1,21 @@ +id seqid locus_name query_id qlen start end sub_start sub_ent ident qcovs bitscore reverse complement is_complete is_trunc fivep_trunc threep_trunc is_extended is_5p_extended is_3p_extended seq start_codon stop_codon is_stop_valid is_start_valid is_cds_valid +0 0 locus_1 0 102 11 113 11 112 98.039 100 178 False True True False False False False False False atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtgattcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa atg taa True True True +1 0 locus_10 9 762 124 886 124 885 100.0 100 1408 False True True False False False False False False atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa atg taa True True True +2 0 locus_11 10 858 897 1755 897 1754 100.0 100 1585 False True True False False False False False False gtgaacctggtgaaaaccccgggtctgcacgcggcgggcaaaggcattaacgttgccaatgtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccaacttcaactttcccggctttgacgtgactcccgcagactgggaacgctttgttaacgacttcctgagctggctgggtcagttcgatatggtctgcgtaagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatcgttgatactagccgtgtagagttagtcgccggtcttatagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcccgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgcgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggaaaataccgtcggcgcaggcggttccatggttggcggatggatttacggcctgctgatgcgcgagtccaccgaacatacgctgcgcctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga gtg tga True True True +3 0 locus_12 11 972 1766 2738 1766 2737 100.0 100 1796 False True True False False False False False False atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga atg tga True True True +4 0 locus_13 12 1098 2749 3847 2749 3846 100.0 100 2028 False True True False False False False False False atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga atg tga True True True +5 0 locus_14 13 1281 3858 5139 3858 5138 100.0 100 2366 False True True False False False False False False ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctattacgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccagcgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcggaaaggcgggattggttcgcgcggtccgggtgaaacccagctcgaagccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgagaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgcagctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaattccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtccgcaggtacgaatgccgattgtcgactggcgtcgcttctgtaaacaagaaccagcgttgatcgaatacgtgatctag ttg tag True True True +6 0 locus_15 14 1434 5150 6584 5150 6583 100.0 100 2649 False True True False False False False False False gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga gtg tga True True True +7 0 locus_16 15 1464 6595 8059 6595 8058 100.0 100 2704 False True True False False False False False False atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctcgtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatctgcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacggtgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcactcaaccggaagggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaaggctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgatgcgctgtcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtaagctgatcagccttgtgcgcaaccttaatcaactgctcaaaagcgagcacgaacgttataacaaataccgcacgagcctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatctgcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcatagcccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttctggccgacagcatcccacacagaaagaggaataa atg taa True True True +8 0 locus_17 16 1836 8070 9906 8070 9905 100.0 100 3391 False True True False False False False False False atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa atg taa True True True +9 0 locus_18 17 1914 9917 11831 9917 11830 100.0 100 3535 False True True False False False False False False atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag atg tag True True True +10 0 locus_19 18 2037 11842 13879 11842 13878 100.0 100 3762 False True True False False False False False False atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacatcgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccacagtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggtcgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattacttgaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcatacagggcggtacggaagtttgtcagcaaaaactggataacatgcgcaacgcaggcgtaaaagtgtacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggggtcagcgcagagctcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacagaagaccggcaagctcggcgagatcctgttctttatgcgggcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccagcaatgactggcagttcgacggcgacgcggaaactattgcacactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgagatttccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa atg taa True True True +11 0 locus_2 1 285 13890 14175 13890 14174 100.0 100 527 False True True False False False False False False atgaaactttttctgacaacggcagcgctaactgcgaccttgacttccggcatgggatttgccagcgataccgtgatcccctgggcgactaacagtggcggtacggaaagcacgcatatcgcggcaatgggcgaggatgtgaatgcgcagcatcagcacatcatccacacgcatgaaggcgtatgtgccgcaaattccggcaccattcaggctgatgaagccgcgctgaccagcaacaagccaccggtacaagtccaaccggaattgttgccgcatcagggctaa atg taa True True True +12 0 locus_20 19 4935 14186 19121 14186 19120 100.0 100 9114 False True True False False False False False False atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa atg taa True True True +13 0 locus_3 2 327 19132 19459 19132 19458 100.0 100 604 False True True False False False False False False atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa atg taa True True True +14 0 locus_4 3 417 19470 19887 19470 19886 100.0 100 771 False True True False False False False False False ctgaaactaatacctttttatttgctcgcgctattttctgcctcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaacccctgcagataacccgccttataccgtgacaatattgccgccagtagaagcctgcggggaaaaatgtgtgaaactgaacgtagagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaacgacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctacaaccgacagctgcaattacttagggctggcgtaataccgaaaaataaaaagtggaaatggataacgaagatggaaaattactggttcaggcgggtaaaagcgcactaa ctg taa True True True +15 0 locus_5 4 444 19898 20342 19898 20341 100.0 100 821 False True True False False False False False False atgaaaaaatttgatagctgtcatcctgtctttttactgatcggctgtgcccaggtgcctctccgttcctccgtgagcaaaccggtacagcaacctagcgctcagaaagagcaactggccaacgcaaatggtattgatgagtgtcagtcacttccgtatgtgccttcagaccttgcgaagaataaatcattatcaaaccagatcgctgataataccgcatcaaaaaatagcgcaatcagctcacgcattttttgcgaaaaatataagcaaaccaaagaacaggcgcttaccttcttccaggaacttccacaatacatgcgttcgaaagaagttgaagagcaacacatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa atg taa True True True +16 0 locus_6 5 543 20353 20896 20353 20895 100.0 100 1003 False True True False False False False False False atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa atg taa True True True +17 0 locus_7 6 606 20907 21513 20907 21512 100.0 100 1120 False True True False False False False False False gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggactcacaacatgcctgtagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgccagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcgccatccagtcggcaacctggttttcggcaggcaggtcgcgtagggcggttaaaaaaatgatggattgcggcagcggtatttattcttcaatcaatacgttactgaaaaattcgcagattaaaaatatagtcattttaacccataatcattgtttgacgtatattgtaaaaaataagcgcggcgtgaagtttgatccggagtatcttaatgcgctcgttatgtatgcggaaaacggaaaactccttttagacggtgaatttgttcccggctag gtg tag True True True +18 0 locus_8 7 642 21524 22166 21524 22165 100.0 100 1186 False True True False False False False False False atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga atg tga True True True +19 0 locus_9 8 684 22177 22861 22177 22860 100.0 100 1264 False True True False False False False False False atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag atg tag True True True diff --git a/tests/test_data/outputs/extract/G7/blast/hsps.txt b/tests/test_data/outputs/extract/G7/blast/hsps.txt new file mode 100755 index 0000000..7f62efa --- /dev/null +++ b/tests/test_data/outputs/extract/G7/blast/hsps.txt @@ -0,0 +1,20 @@ +0 0 102 22872 1 102 22861 22760 102 0 100.000 100 100 minus 1.84e-51 189 +1 0 285 22872 1 285 8982 8698 285 17 94.035 100 100 minus 2.36e-124 433 +2 0 327 22872 1 327 3740 3414 327 0 100.000 100 100 minus 5.47e-176 604 +3 0 417 22872 1 417 3402 2986 417 11 97.362 100 100 minus 0.0 710 +4 0 444 22872 1 444 2974 2531 444 15 96.622 100 100 minus 0.0 737 +5 0 543 22872 1 543 2519 1977 543 0 100.000 100 100 minus 0.0 1003 +6 0 606 22872 1 606 1965 1360 606 15 97.525 100 100 minus 0.0 1037 +7 0 642 22872 1 642 1348 707 642 0 100.000 100 100 minus 0.0 1186 +8 0 684 22872 1 684 695 12 684 0 100.000 100 100 minus 0.0 1264 +9 0 762 22872 3 762 22746 21987 760 0 100.000 99 99 minus 0.0 1404 +10 0 858 22872 1 858 21975 21118 858 19 97.786 100 100 minus 0.0 1480 +11 0 972 22872 1 972 21106 20135 972 0 100.000 100 100 minus 0.0 1796 +12 0 1098 22872 1 1098 20123 19026 1098 0 100.000 100 100 minus 0.0 2028 +13 0 1281 22872 1 1281 19014 17734 1281 11 99.141 100 100 minus 0.0 2305 +14 0 1434 22872 1 1434 17722 16289 1434 0 100.000 100 100 minus 0.0 2649 +15 0 1464 22872 1 1464 16277 14814 1464 15 98.975 100 100 minus 0.0 2621 +16 0 1836 22872 1 1836 14802 12967 1836 0 100.000 100 100 minus 0.0 3391 +17 0 1914 22872 1 1914 12955 11042 1914 0 100.000 100 100 minus 0.0 3535 +18 0 2037 22872 1 2037 11030 8994 2037 16 99.215 100 100 minus 0.0 3674 +19 0 4935 22872 1 4935 8686 3752 4935 0 100.000 100 100 minus 0.0 9114 diff --git a/tests/test_data/outputs/extract/G7/blast_db/contigs.fasta b/tests/test_data/outputs/extract/G7/blast_db/contigs.fasta new file mode 100755 index 0000000..a1fcacd --- /dev/null +++ b/tests/test_data/outputs/extract/G7/blast_db/contigs.fasta @@ -0,0 +1,2 @@ +>0 +nnnnnnnnnnnctagaatgaaccggaatgcggctctaaactcatcccataatcgctgattaatcgccacgttaccgtaccatgaggggcggcagtcgaactcaatttaaatgacgctgatgattttggcggaacgaaggtcgctgatttcacttcatggctgttaagcgtcacactggcaaagttcatataatagggggtagggttatttacggtaataacatcccctgccgtctgccattttaattgttggctctggctatcaggcgttgatttggttaatgctggcggtcgataaataagctttatttgggtattaatggagatttccacgcggttcgcggaggcattatcatcaatagaaggaatacccttaatattgagccagtacatagactcccgatctgcaggtagaggggcgccagagcgaataacccgaatactgttcttttgcccggcatcaaggcgaaaaagaggcggggtgataataaacacctgcttatttgtgacctggggatcggcaaccgataaccatgactgaacaagattcgctttgctgtctttattctctacattgattgacgactcatcattattgccgtcgaaaaccagtcgggttccgccaacaacaatgctggcatgggcaacatggcttgtcagtaatacgaccaacacggatttctttatgtgtttcatnnnnnnnnnnntcatgccgcttgctcctgcacccacttcgtcaccgtggcgcgaatgtcgcgcatgacggcatccagcggctgggtcgcgtcaatggtgcggatacgcgagtcttgcgccgccagttccagatagcgcgcgcgagtccggttaaagaaatcaaaagattcctgctcaatgcgatccaaatcgccgcgcgctctggcgcgttttagcccgacttccggcgtgacatccagatacagcgtcaggtcaggacgaaaatcgcccagcacggcatcgcgtagtgtcgccagcatggtttgatcaatgccgcgccctcccccctgatacgcctgggtcgagagatcgtggcgatcgccgatcacccatacgccttgcgccagcgcgggtttgattaccgtttcgacgagctgtacgcgagcggcataaaacatcagcacttccgctttatcggtaatcacttcgtcgcctaccgatcggatatccagcaccagacttcttagtttttcggcaagctgcgtaccgcccggctcacgggtaaaaatcatgttacgaatacccagttgctcaagggtctccaccaccacgtcgcgcgcagtggtttttccggcgccttccaggccctcgatgacgatataattactgcccatnnnnnnnnnnnctagccgggaacaaattcaccgtctaaaaagagttttccgttttccgcatacataacgagcgcattaagatagtccggatcaaacttcacgccgcgcttattttttgcaatatacgtcaaacaatgattatgggtaaaaatgactatatttttattctgcgactttttcagtaacgtattgattgaagcataaataccgctgccgcaatccatcatttttttatccgccgtaagcgacctgcctgcggaaaaccaggttgccgactggatggtgcgcaccgtattactggaatagagattgtaattttgtatatcggcactgaaggctttacccagcgctctggcatcttgcgcaccgttgaccgtaatccccgtgctgtctgacaggcaggtattatcggaacgatcgcaccgctcggcatggcggaataacacgactaccggatgctgcttcgccagcgccgccagcgccttaccgttaatctgcggtagaccgttaccgctccaggcatgttgtgaggccaacccagcgataataaccaacgcccccgccagaatcgcgaaatagcgtttgtttttaataaagcgtagggtaaatgccagcacnnnnnnnnnnnttaaaccgcttgttcaaggctgaaactgtgacagtgaacctgcggtttcagcctgtcgttcataattttaatcgcatcgccaagctgcatggttcggcctgctatcaccacgcctggctgtgccagcagacacaacgcggcattttctcccggctcaacgaccagcaaattaacctgttccgccgtatcgcttaaccagacgcttgcggcatcacccgtgcccggggtccacatttcgccatgcgccacaaaatgccagcttttcggcatctgcggtttgagatagcgaatcgccaccagcgcattcaataccagctccgcgcgttgctctttggttaattcgaaatcccggcatttttcttcaaaggaaaaatagagcgcggcatcatccacacaaaaaccggtcgggcaaaacgcgtccggcgtaagcattttacgagagaagcgcgagcgaaaaagcataccattggcgagatcgagcatcatacgatcgtgctcttcatcataataccagcgccagttatcgtcaggtttaattcgcatnnnnnnnnnnnttataaggcttgcagtctttcatgggcagcaagtaacgtctgatatatgcttaaattcttacttccgggttcaagtagaacttttttaaattcggtcatgagttgctcttcatcttctttcgaacgcatgtattgtggatgttcctggaagaaggtaagcgcctgttctttggtttgtttatatttttcgcaaaaaatgcttgagctgattgcgctattttttgatgcggaattatcagcgttctggtttgataatgatttattcttcgcaaggtctgacggcacatacggaagagactgacactcatcaatactatttgcgttggccagttgctctttctgagcgccaggttgctgtaccggtttgctcacggaggaagggaggggcacctgggcacagccgatcagtaaaaagacaggaagacagctataaaattttttcatnnnnnnnnnnnttagtgcgcttttacctgcctgaaccagtaattttccattttcgttatccatttcccctttttatttttcggtattacgccagcccaaagtaattgcagctgtcggttataggttccaatagcgtgtccaccatacgccccttcaaatgctatcaccatgggtccctgcccttttaccccctcttgtccttcatggcaataaagcgcgttgcggtgagtcacgtcaagttcatactgtacgttcagtttcacacatttttccccgcagccttctactggcggcaatattgtcatggtataaggcgggttatctgcagtggttttccatgttccgatcaggtctttgcaggcgtttatctccgtagccccggaggcggcagaaaatagcgcgagcaaataaaaaggtattagtttcagnnnnnnnnnnnttagtcccaccaaacgtcgaaaagttcgctggttcggacttcttcaaggttgcgcgcttccagccacttacgcacaatcgcctgatgttcttcggtacatttaccgatttcctgcatacaaatcagcccttcccaggccaggtagccgctgccgtcaaacgccagtttattaggctcaataacgtcattaataaagtcatcgacagtcttatcgatctgctcttcagatgtaccttccggaaaacgccatgccaccgaaaatcctaattcctggaattcgtcaatgtgcatttttttacgcagacgacgactacggttctttgccatnnnnnnnnnnnttaaggcgtcacaatcagcagtccctcgctcgcgccggttgcccgccactgaggcgcgtacatcgattccacctgcggttgcgcaagctggtacgtccccggcgttaccgcgcgcgccaggtagaccagcgtcacgggctggccctcattgacaacgacggcagccacaaaccgatcgtcgcggaattccatatactgaatatccgcctgctgcatctgattaagcagattttgcacttcgctaccgctctccggcaggctggcgctgctgtcagccagattctggttttccagctccagcccggccgggagcaggtccaccaccagcgcatccggcacattgcgatcggccactaccgttaaccagaccagcaccagttcgccgctacgcaacgaggacagcgatttgcgctgaccatcggtccccagtatttgtcgttcaatctgcaaaacgttgctggcaggctcaggcgcagatgagggatagccgctgctatccagacgcagccatagcggctggctaccggtgttcgtcacctcaagggcggccagctgatcagcatccagattacgggtcagcgccttgtcgcccgacagcggctgcgcctctaacgaggtctgcgcctgccaggcgcccgcgctggcctgtcgcgaatgcgcggcgaggaacaaggcattgttctcctgggtagagagccagcgctgaccgaaggcctgctcagaaagcgagcttaatagcgcgttttgcgcgtccggtctgaggttgttctcttccagtaacgacaacatcagagcgttatcgcgcagagaactgccgtaatccgctatccattgccgttcgtcctgacgcggcgtattcagagccagcgtaatggcctcttcgccgcgtctggcatcacccatcgtgtttaacgcgatgcccaattgcatcagcggcagtcctgaagccgcctgactacggcgctcccagatttcgcgcagcgcgccgagcggcgctttctgctgacgcgccagtaccagcgcggcgtaagcctgagcggcaaaagtactggcctgggtattatcgctataacgaatcagcatcgtaccgggatcctgcagatagcgcagcagtcgctcattgccccggttaatggcctccggcgggacgctatatccctgctcgcccgcgcgaatgaggaaatccatcgcgtaggccgttagccagggctcttccgccccattttcatcccatagcgcaaaaccgccgttatcacgctgcatctgtagtatgcgggagatgccgatatccaccgcggcgcgccgtttttcatcgctatcgccggtaataccgagcgactgcaattgagcggcattggtatacagcgccgggaataacccgctggtggtttgttccaggcacccgtacggatatgctttcagctcgcgaatgtagcgcgccagattgagcggcggttttccgcttaacagcagttgtccctgtaacgtggctggcgagacgtttgccagatgctgctctggtacatgccagctctctcccggcgccagcgcaatgccgctatttaccgtttgggcaggccaggccggacgcacgccgatttgccactgcttatgctgcgcgccgagggtttctcccggcagattcagaccgctaatggtcgcctggatttcgccttcgccaaaaccttccagcgcgcgtaccggaacgaataaggtggtgcgcacgcccggcgccaggttgaccggttgcggctgttgactaagcagttccagtaacccactggcggcgagcgcaatattcagcgtctgcgggcggtcggtcagattggtgacgtccagcaccagtcgcgaaacatcccctcccgccagaaaacgcggcatattcagctcggcaatcactggcgcggcgacaacgactttgctttcgccgcgaccaaaatcgtccgctgtccatgcctgcgccataacccgcagttcgccgttaaagtcgccaatcggcagcgttacgaccccttcgccctgctcattgagcgtgatcggctgcgcctgctgcgcgatgatattggcatggtttaccggcggttttccgccgcgcgtaaggtcgtcgccatcgccgccaaaacgcaacgccgccagccgcccctgcccttcaatgacctggccgtaaatatcgtagatatccgcaccgtagcgtttttgaccgaagaacgcctgccacgggtccggcgtcgcgtaatcggtgatattcaatacaccgctatcgaccgcggagaccagcacgttgatctgttttggcatttcgccgtgtttaacgctggctttcaccctgacggtgagcggctgattcgggcgcattttagccgggctttccagcgccagatcgaggcggcggttgtcatcccccagcggtagatgtagtaaccccacggcgcgttttggcgtcgcggaacgagatttatcgccgggacgcaccaccagcgtactgagatagagatcgtggcgattccaggttttatccaccggaatcgtgagctccagcccctgcgccggcacgtcgatcgcctgccaccacagcggaccatcgctggactccaccatggcataacctttaccggcgaccggcgcggcgatatgcaatttcatggtgtcgcctggacgataattcgctttatccagtttgagggtgacgcgatccggacgcgccgcgccgctaccgtcgctgttatcctgccagctatagccggcccagaaacgaacgctgctgaccgtctcattcggcgctttgacctccagacggtacgcgccccattccaccgggaagctgacttttccggtttcatccgcgttcagatccagcgtctgctcgccctccaccagatctttttgatcaaactgcgactgccagccttcgctttccgaccagttccagtaatagtcgcgacgctcgcggatgagccgcacctgtaaaccggacaccgcttttttctcgccctgcgcgttggcgtaaacaatatcgaatgcggcgttgctgtcttcgtcgacaatcggttgattaacggtggtatccgtacggtagtcgtataccgctttggcggcgaactgtggacgaattcccggtaacgtatcggcaggccaaatcgcctgctctacgcgacgagtgaccggacgaccgccagactccagcaggctggcctgtagaattacctgcaacggcgaatgcgcttcttgccactggctggcagcactcacttcaccacgtccgcctttatccaacgtcagttgaacttcgtccaggctgcgcgaaagattctcttcggcaatattgccgaactggaagccaggcaacgccgcgacagcgtcgcgcagcggacgcaggaaaagttgcccttgcagggtattgccgttagcaggggcgccatacaagtaatagccgacaacggagaatttcacctcatccgcaggcgccagcggtgttttttgcgccgtcaggttgagcgccatccgctccggcataaagtcttccacgtggaaatcccaactccgcagcaaattatcgccggtgttggcgcggacatgccacaagccggtcggcgcgttgatatccagcggataattcaaacggtatagtccgttttccggctggctgacgacggtacgcatcacttgtccgtctggttttaccacttccagcttaacgggttgatcgggcagcgttttaccgtcgctatcgcgcagtaatccgttgaggataaccgtttcgcccggtcggtagagatcgcgtgggccgaacataaagaactgcttgctgtagccgggcgcgccagcgacattaaactccgacagatccagagccggaagcgtgagatcgagcagcgtggtctgcccctctttacgcgccagtaatagcgccgccgctttatcagcctccagttgcacatgtccctgcgcgtcgctcgtcgcctgcgccagcgtctgccctttatcattcagaagaacgatctcaattcccgactgcgccgcgccgttttccaggctttgcgtaaagatatccagccgactatggtaacggtgcgcggacacgccgatatcgctaagggtaaacaacgtagcggcattactatagttgtagtgtccagcctgattcattaccgccacatatacgcccgcctgttgcagcggcttaatatcgcttaatggcagcagcagtttctcacgcgtattacgcgccggattaagatcaaaacgaccggtataaaccagatccgccattttcagcagattgtcggattcccagttagagagggaactacggtactcccactggctgacaaacgacgccagcgatccgggcttaacgcggaaaaagttcacatcaacgtggttgacgttaagcgccatgaccggcagtccttccgctattttccccggtagcagcgatccccggctggcaaagccgacgctgggctggacatcacgcgtggtaatcgttttttcataagacttgccgaaggtggcgttattcagcgctttaacggcgggatcaaccgtgaccaccagcacgcgctcaggttccagatgacgtaaccttagctcttttaaatttggcgccagctcccatgcgccgtcaacgctgccgcttttcttatcaaccacgtgaaccacacgggagaaatcctgttcaggatctaaaggaattgaaaacgtcagcaccagcgtcgccgcgccgtcgagctgcgcttcggaggcgtctaatagcgtgagcgctttgccctggctttgctgcgccagcttttgtagctgtgacgggtcttgcgcgggcgagggctgcgctacggctggcgcttcgcttttagtcgtcggggcggttttatcgttgttatcgcatcccgccagcgccagcatgatcatgcaggccaccacgcgtaaatgtttcatnnnnnnnnnnnttagccctgatgcggcatcaattccgggtggccttgtaccggcggcttgttgctggtcagcgcggcttcatcagcctgaatgctgccggaattggccgcccatacgccttcatgcgtgtgggtgatctgctgatgctgcgcattcaaatcctcgcccattgccgcaatatgcgtgctttccgtaccgccgctgttagtcgcccaggggatgacgggatcgctggcaaatgccatgccggaaatcaaggtcgcagttagcgctgccgttgtcagaaaaagtttcatnnnnnnnnnnnttagatattccgtaaagaagcaaaaagtaaagcccactcgctcttcgcgcgatagaagaccggcggcttgccaatcggcgcatccacggtaatttcaccgccgtggtgagcttcgccggtccagatattcacccagtgatcttccggcaggtacagcgtccaatcgcaacgcccctgctcgtgaaccggcgcgaccagcagatcctgaccgagcagatattgatatttcagggtgtaggtagcggcatcgttctcgtagtgcaggaatagcggacgcatgaccggcagaccggtagccgcgttttgcgccaccgcctgcttgagatacggtttcagcgtggtaaagacggtggtcatgcgggcaaagtgggcaatagtttccgcgtcgccgtcgaactgccagttattgccggggcggttgccttcatgggtgcgcatcatcggcgtaaaggcgctgaaatcgcaccagcgcagcagcaactctttgctgcgcttcatgtcaaacagggtggtgtagccgccgatatcgctgtgatgcagaccatggccggtcatcgccagcgacaatgcagcaggcacgacagaggccagaccatcatcaagactccagtcaacgttctggtcgcctgcccacatcatggtggaatatttctgactgccggtgtaacccgcacgcataaagaacaggatctcgccgagcttgccggtctcctgtagcgcttcgtagttacacttcgcccacagtgcgggccaggcgttatgcatgatctctgcgctgacgccgttgtgcagatacgtgtcggtcggcagatattcgccgaaatccgccatccagccgctgcagccgagcgcgatcatgttctttttgatgacatctttgaaccagtcgtaagcttcaggattagtcagatcgaccacgccgccatagaattcgccaaactcgaccagatagtcgccgcccgtggcgtctttcgccagatagccgtgtctcgccgcctcagcgcagaggtctttatcactggcgacgtatgggttgatataagagaggaactggacgccttcttctttccactgtttgatccggctatccagctgtggatagttgtcgctattccacttccagttccacatcacgcgcttgccaaaggaggtcatgcggataccggaccagtcctgcgcccaaataccgttcacttttacgcctgcgttgcgcatggtatccagtttttgctgacaaacttccgtaccgccctgaatgccgagcgtgacgccgtcgtaaacccagtccggcagctccggctggcgacctaacagcgcagtcagtttttccagcagggcgatgtaggtgtcggcacactcaaaacgcagcgtagttttatcttcccacagcgccagttcgtgatactccggcgcgctgaagtcgaaattcatatagcagctattatcgacgtggcagtaatacttctgcgtgctgacaaaggtcggttgcgggaagaaggtccagtaatagtcgccgccggcgttctctttacagtctgcctgccaggtgacatagctggttttattacggccaacgccctgttcgctggtccacagcgggaacggcttgccgcgcaaatcgaaataagagaactgttcgccgcagccgtagatatggtcgtctggattagctgcgaggcgtaaccagatacggttatggtgcaggtcgtcgttttgcagatccaacgtcaggcgtcccgcctcatcggcggagatgcgaagggtggcgctaattgttgcgccacggctgaattgtaccagccagccgtcgggtagctcgctgacggtggcctccgttaatgcaatcttctcgttaagtttgtctttgatgctgaagttgccgcgaaacatgtcaatgtcggcaacgcccgcgccaatccacagacaggggttttcggcgctgtggcgtaaaatcaggcgctgttgccagctaagcgcaaaaccatcctgtgatgttgtcagttcaaaatcggttgaccgttgtggtagagaattcatnnnnnnnnnnnctactcatcttcaagataagtataaccgtacagtcccgcttcaaattcctcaaggaactgctgctgcaacgcatcgtccagatccgtctgttttacctggtcgcggaaatgcgttaatagcgttttcggatccagttgcacatattgcagcatatccgcaacggtatcgccttcgtccgacaactcaacctcgacactaccatccgggaagacaaacacgtcaaccgcttcagtatcgccaaacaggttgtgcatgttaccgaggatctcctgataggcgccgaccataaagaagccgagcatcggcggattctctggatcgtattccggcatcggcatcgtcgtggcgataccgtcgccatcgatatagtggtcgatagcgccatcggaatcacaggtaatatccagcagcacggcacgacgttccggtacctgatctaacccttccagcggcagcaccggaaagagctgatcgattccccacgcgtccggcatcgactggaacagcgagaagttgacgtacattttgtccgccatccgctcttgcagttcgtcgataatcgggcgatgcgcacggttttgcgggtccagttgcttctgcacttcatggcacatgctgagataaagttgctccgcccaggcgcgctcctgcaaactaaacgcgccggaagagtagccgatatgaatatcgtgcagatccatttggctatcatgcagccattcacgcagcgagcggcgggtgccaggcttatgcatctcctgccaggtttcccacagattttgcagcgcgcgcggcgcatcttcagcaggggcggtcggatccgtgtattcgttacgctccacgccgataatgttagagaccagtaccgtatggtgcgcagtgacggcgcgcccagactcggtaatcaccgtcggatgcggtaaaccatgctcttcgcaggcatcgccaatcgcccagatgatgttattggcatattcgttcaggccatagttcaccgaacagtcggactgcgagcgggtaccttcataatccacgcccagaccgccgcccacgtcgaagcactggatattaacgcccagcttatgcagctcaacatagaaacgcgcggactcgcgcacgccggtcgcgatatcgcgaatgttcgccatctgcgatcccaggtggaagtgcaacagttgcagactgtccagacgcccagcgtcacgcagggtctccaccagttgcagcacctgcgtcgccgccaggccgaattttgatttttcgccgccggaggattgccacttaccggacccctgagaggccagacgcgcacgcacgcccaggcgaggaaccacgttcaggcgctcggcctcttccagcacaatcgcgatttcagacatcttttcgatgaccagataaaccttatggcccatcttctcgccaatcagcgccagccgaatatattcacggtctttataaccattacagacgatcacgctacgggtcatgccggcatgcgccagcaccgccatcaattccgctttcgaccccgcttccagccccaacggttcaccggaatggataagggactcgatcacgcggcgatgctgattgaccttaatcggataaacgaggaagtagtcgccgttataaccgtaagattcacgcgcacgcttaaacgccgcgttaattgaacgcaaacggtgttgcaggatctgcgggaagcagaacagcgccggcagacgctgaccttgcgcttcgcgcgctttcaccagtttggcaagatcgacacgcgcttccggtacgtcgggatcggggcatacgctaatatggcccagctcgttgacgtcgtagtagttattgccccaccaggcaatattgtaagtgcgcagcatcttgctggcttcctgggagctcattgcaacctcctgcatnnnnnnnnnnnttatccgatacgactgacttcatcaaataaggtggctaacccgctgcgccgttccgttcgcgtcacaatcgcgcctgccaggatccgttcatcggcatacagcgataaccgccgccgcgcccgcgtaacagcggtatacaccagctcccgcgtcacgaccggcgaacgttggctgggtaaaatcagcgcggcgtgatcaaattcagacccctgtgatttatgtaccgtcatcgcccaggttgtatcatgttccggcagacggctgggctgaacggacttgatcgtgccgtccggcatcacaaaccagacgcgtaacccctgcccgcgatcgagcgcaataccaatatcgccgttaaatagccccaacgcgctatcgttgcgcgcaatcattaccggacgcccttcataccagcgagagtgcggatgccgctgaatttttcgttgctgcaccatcgcctgctcaatgcggtcattcagtcccctcacgccaaatgggccttcgcgcagcgcacaaagcagttgatactcattgaaagcctgaaggattgcctccggcgccgctttttcatgcagcaaccgcaggtagcgcccatagcccgccagcgcttcatccagcatcccggcataatcgtcgctgctttgcaatgtacgcttctctatatcgctaaacccctgctgaaaaacagcctggattgccgacctgtcgccacagttaattgccgccgccagcttgccgatgccagaatcgctgccgaaacggtagctcttttgcaacaaacagaggctatcgcgtaaagacgcggcttgcgttccggcccccgccggaatggcgctaccagtgagtcgacttagctgtcgggcgcgttccgccgtaaaccctgcgttgacataggcgcaaatatcgcccaacacagcgcccgcctcaacggatgccaactgatcgcgatcgccaagaaaaatgacccgcccgtgcggcggcagagcgtcaatcaaacgtgacatcatcggcaaatcaatcattgatgcctcatcgaccaccagcacgtccagatgcagcgggttgcccgcatgatggcgtaatcgctggctgccgggctgtgcgcccagcagtcggtgcagcgtactggcgtcctccggtatacgctttttctgcgcatcggtaagaggaagctgacgcaacgccgcgccgagcgactccgtcaggcgtgcggccgctttcccggttggcgccgccagccggatacggcaacgttcgccatccgccatttgaattaatgccgccagcagcttcgcgacggtggtggttttaccggtgccgggaccgcctgaaatcacggagatacggcgagttagcgctacggcggcggccaccttttgccagttcacctcgtctgtcggagggaatagcgcgtccagaatacgggataactgatcttcatctacggcgatggcctggttaacctcgttaaaaaagcgcgcaaccgtacgctcgttgcaccacatgcgattcaggtagaggcgatcgccgcacagaattaacggcgcggggctatcgccgcagctaaccgccgcagacgccagtaaccgctttttccagtcgattggcgtagccgtttcgcttatccaggcgaccagtaagggatgcgcctcctccgttaacgttaaacgcgacaacggcagacacacgtgaccttcacctgcgtcatgactaagcagcgctgccgccagcgtcacggcgggatcgtcgttaccggcgacggttaaagcaaactgggcatcaatgggccgtaagagtttttgttcaacggcctccagcaaccgcttctggattgtcatnnnnnnnnnnnttattcctctttctgtgtgggatgctgtcggccaaaaacgacctccatacgggcgccaccgagcagactgtcgctggcaatgatctgcccggcgtattgttccgtaatctcgcgcgcgacagccagccccacgccttgtcctggtcgtagggtatcggcgcgctgaccgcgatcaaacaccagggaacgtttgctgtggggtatgcctgggccgtcatcttcgacgaaaatatgcaaatgatcgtcggtctggcgagccgaaatctcgacaaactccagacaatatttacaagcgttgtccagtacgttgcccattacttcgacaaagtcgttttgctcgccgacaaaactgatctctggtgaaatatccatactgatattcacccctttacgctgataaactttattgagcgcggagatcaggttatctaacaacggcgcgacgggatgcagttcgcggcttaacaacacgccgctaccgcgcatactggcgcgatgcagataatagccgatctgctgggaaatccgactgatctgttccagcatcaccggttcagctttgctgacgctcatcttttcgttgcgtaaagagcgtaacgtactctgcaaaaccgcgagcggcgtttttaaactgtgcgtcaggtcggttagggtcgtgcggtatttgttataacgttcgcgctcgcttttgagcagttgattaaggttgcgcacaaggctggtcagctcacgcgtcgtctccggattgagcatttcgcggtgatgatcttcaagttcgcggacttcccgcgccagcgcctcgatagggcgtaagctccaccaggcggcgatccacagtaaaggaatgactaacagtaaattggcggccagcacgtatacgaaccagctccacaccatataggagcgttttagctctatcggaatggtatcgaccaccacgatggttaactgcggcatccgcgtcgtggcaggataaatatttaccgctaccgagtgggtcatctccgcatcatcgtcatcttcacgtacttctttgagtttttcctgcgcggaatggtcctcgctcaacagcgtgctggtggcgtctacgttggtttcaatttcatggaagccgttcgtttttaaccattccggttgaatgcttttaatcagccaggggatgttgcgctgcgtccataataatttgcccgtttcatcgtaaatcagcgtcatggtcgggctttgcatgtccagattttcaggcagctcaacgctgattttattattttcccatttggcgagggtataaaacaggttgctttcgccgcgcagcagacgaaacgtggttttatcaaaacttacgctatagccgaccagcgccactatgccatatgccagagaaagcaccagcacgacgccggctgtcgccagcaaaaaacgaacccgcagcgacagcggcagaaaatggcgagcaaatttattcatnnnnnnnnnnntcatttttctgtgatttgttctgcaagtcgggcaatacgccttgccattccccggaaaataaacaggtgcgccgggatcatcagtagccagtaaatcaggcccggcattccgtgtggatgccaccaggcgcgcacgtcaatttcgcggtagcggcctttatcgtgcagcgtgaagctaagccgccccagacccggcgctttcatgccaaacaagagcgtgagctgtttttctggttcgacaatgatcactttccagctatctaccgtatcgccaggcttgagcaaggtatgcgacgggcggcctttcgccagtttatgccccaccagacggtccatcgcggcgcgcgtctgccacaaaatattgccgaaaaaatagccctctttgccacccagccgatttacgacctgccatagcgccgataggctggccggggtctgcgcggtaaagcccgcctgctttggaaaatagccgtattcgggacgccagcgggcgaaggccagcgcgtcgtagccccagtcgctggagttcaccagtttttcttcttctttcagcgtgcggcgaacggcgtcatcaaaggtgataagcgtttgggggatcaacttttttaacgcggcgtcatcggccagcaaatcgtgccttaatccctggattaacgcttttgcggtagttggcggcacggaggtaatgacgtttaaaaaccagaccgaaatccagcgggtcggaaaaggcaccgggatcagcggacgccgtttaccgctgacggccataaaacgttcaaactgctgctgataacttaatacctgcggcccggcggcttccagaatacgatgctcgtgcgcagggtgctccagtaagccgaccaggtagtagagtaaattttccagggcgatgggcgtggtgcgcgaacgcacccagcgcggcggcgtgagtattggcaggttgtaaaccatgtcgcgcatgacctcaaaggcggcggagcctgcgccgacgatgatcccggcgcgtaattccgtcaccggtacgcctgcgtcgcgcagcgtgtcagccgtaagctggcgggcgcgcaggtgatcggattgctcatgcgccggcgcctgcaatgaactgaggaaaataagttgtttaaccggcgtctggcgcagcgcgtcgcgcacgttgagcgccgcctgacgctcatgggcgataaagtcgccgccttcgcccatgccgtgtaccagatagtaaacggtatcaatgtcgcgaagcagcgcgggtaaattttccggccagtgcagatcgaccttatgacaactgacgttggcgaggcgatgtttttccagacgttccacgcgccgcgccgccgcccgcacctgatgtccttgctgacttagcgcaaagaccaggtgctgaccgatatagccgctggcgccgaggaccagaatgcgttgcgccacnnnnnnnnnnnctagatcacgtattcgatcaacgctggttcttgtttacagaggcgacgccagtcgacaatcggcattcgtacctgcagactgacgctaccgtcttcctccatccactctttttctattgcctgaagctgataaaaccggcttctcagacgcccttcctgcggcagcaaacgcagcgtatgctgcgccacctcgccggaaagacgctccgtcaaagcctgaaaaagctgtggtattcccacgccgctttgcgctgaaagccaaacgcggatgggtttattctcttcatctctgtcgatacgcggttcaaagtcgtccagcatatcgattttgttcatcaccattaaggtggggatttcgtgagcgtcaatctcttcaagaacggtgtttaccgcctcgatgttttcctgcacacgaacatccgccgcatcgaccacatgcagcagcagcgtcgcctgacgcgtctcctgcagggtagctttaaaggcagccaccagatcgtacggtaaatggcggataaagcctaccgtatccgccagaacggtttcaccgacatccgctacatcaatacgacgtaacgtggggtccagcgtcgcaaatagctgatctgccgcatagacccgcgcttcagtgatctgattaaaaagggtggattttccggcgttggtatagcccaccagcgataccgtcggaacgtcggccttgatgcgcgactgccgcccctgctcacgttgcttctcaactttctccaggcgcgactgaatctgcacaatgcgattacgcagtaaacgacggtcggtttcgagctgggtttcacccggaccgcgcaaaccaatcccgcctttctgacgttcaaggtgggtccagccacgcaccagacgcgtagccagatggcgtagctgcgccagctcaacctgcaacttaccttcatgggtacgcgcacgttgggcaaaaatatctaagataagaccggtgcgatcgataacccggcactcgcacaaacgctccaggtttcgctcctgggctggactcaatgcatgatcaaacaatacgaccgctgcgccagtcgctttcacggcttccgcaatttcaactgccttaccttcacctacaaagtacttcgggtgcggtgctttacggctaccggtaatcacctgcattgcttcgacaccggcggaagagaccagagattcaaactcctggaggtcttccatatctttgtcttgcgaaaaatagatgtgtaccagtaccgcctgctcaccggcatcataacggtcaaacaannnnnnnnnnntcagccgctaaacacgttaccggcgcccggcgcgctttttaacacccagacgcgaccatagtgattataccatccggcacgatgcccggcatccgggccaatcccctggtaaatatcaaagtgctggcctttaatcgctccgccgacatccagtgcgaccatcaaacgtagctcatactgaccgctaaatttaccgttgttatccagcaacggtacttccgccaacaaggttgtgcccggcggaatgatgctgcggtcggaggcgacggatgctcgcccaatcagcggtacagcgctggcgcctttgaccggcgcaaaagattgcggtttaaagaagacgaacgacgggttctgctccagtaattcacgcacttccgcttcgctgtgcttctctccccattcgcgtatagcctgcatcgacatatcttcttttttcacttcaccgcgatcgataagcactttaccaatactgcgataaggccagccatttttaccggcataactaaagaagttcagcggactaccatcaccgaaatcaatataaccgctgccctggacatccataataaagttatccatcagcgaattactccaggccaggatgtacttatcgctcagcgcgcctgcgtagatctgggcgcgggacggtaagcgtccgcgttttggcggcatactatagatagggtactggaacgcgccctggcgcgtatggcgagcctgaacgacgggcgtatagtagcccgtgaactggacgttaccgtagttgtcggtgccttccatctgccaggcatcgataccaaactgacgcatagtgcgcgtatcgcctccggaacgtaaccagttctggacagcgttatagacgttgctttgattggtgtataaacgcggcgacgcggaacggatctggtcgacctgctcggcaaagtcaccagcattaatcggcgcgcccaccgcgtccggctggtttaccagggagaagggctgggtaaatttcccgtccttatattgctgaccgcgatcggtcggttttgatgaacaggcagccagcattgccagcattacgcctgtcgccacatattttgcccaacgtcctttcatnnnnnnnnnnntcattctgacacctccattttttgcgccattttggatgctctgtattcagggatggtggtcacaatcgcaccgactaacgcgaacagcgtaccgatgatggtgaccaggtagaccgtattgcctaatgaagggatcaattcatcaattagcactgagccaagcagttggcctgctgttgacgctacgcccagcatcaatagccctaagcctctcaccagaatcgccattagcccgatggatagcagacccagcggaccaccgagatacatccaccatgtatcgggtaactggatggtgacatggcctaatgcgatacgtatcgccagcgccgcgcccaggacacaaaagccgacgatgaagttccatgtaatggacaccagcatggagcccgttgcctcggcgactttcgcattccccgcaggctgccagccagcgagtaaccctgccaaaaaggggaggatagcgagcaggataaacgaggttgagtgccactgtggcgacacgacaaaaatggtggcgataacggcgaacaatgcgccagtaatgcgccatggcgtaaaatattttttctcctccacgccgatgccaaaacggtcgcacagcaggccggaaagaagcagagcggaaattaatgccgtttgaaaggtggcaacgcccagcgcgctggcggatgcgccttcagaaaatacgaccatcgccccgcataatcctgcaaaccaattccatagcgggatttttctctttttaatcagagtagggattgaggcgaattgctggcgtgtttctttgcgcgcaataataataaaaaacatgacgaccagaccgctggcaaacgagattactgcgcaagcattaccgtcttgtaaccaatgtcctaactgcccattaacggcagactgcatcggggaaagcataccggctaagatggtggcaagcatcagtaagggggttgagtacttattcttgttcatnnnnnnnnnnntcagttaaacggttgtaagtcgacacgcgccatcattgcggccaactgaggacgatcggtaatacccacattgctctggctgaccgccagcgcggcaaccgccgtcgccagacgcagcgtatgttcgctggactcgcgcatcagcaggccgtaaatcaatccgccaaccatggaatcgcctgcgccgacggtacttaccacgtcaaccgccggtggtttagcgatccattctcctgaggcgttaacccacagcgcgccttccgcccccagcgaaatcaccacatgagcgataccctgttcgcgtaacgcgtgcgccgcatcaatcacatctttcatttccgggagcttacgacccgcccaaatttccagttcgcggcgattcggtttcaccagccacggcgcagctttaagaccggcgactaacgcttcacggctactatcaaagataatgcatggacactggctgcgcagacgcgtcatccagtcggtgaacgcttccggactcacgccagccggtaagctaccgctgacgcagaccatatcgaactgacccagccagctcagggagtcgttaacaaagcgttcccagtctgcgggagtcacgtcaaagccggaaaagttgaagtcggtcacttcgccatctttttccgtcagcttcacgttgatgcgggtccggccctgaaccacctgaaagcggttagcgatacccagttcgctgaataattgctgaaaaccgtcctggttatctttaccgagaaaaccgccgacagtgacgtcgatgcctaagtctttcagcactttggcaacgttaatgcctttgcccgccgcgtgcagacccgtggttttcaccaggttcacnnnnnnnnnnnttaccattgcgtgccaactcccacgctgtctaaccagtctgaaaccacatcatgcgcgctgtgcgcggttaaatccacatgcaacggcgtgacggagacgtagccttcatccaccgccgcgaaatcggtatccggcccggcatcgtatttatcacccggcgggccaatccagtacaatgtattaccgcgtggatcttcctgcgggatcactttatccgctggatggcggctaccgcagcgagtcacgcggatgcctttaacctgcgctaacggtagatccgggacattcacgttgagaatacgcccggtacgcaacggctcccggcttaaccctcgcaaaagcgcgcaagtcacggctgcagccgtatcataatgctgatagccgttaagggagaccgctaatgccggaaagccgagatgacgaccttccatcgccgcggcgacagtaccggaatagatcacatcatcgcccagattcggacccgcgttaataccggaaacgacaatatccggacgcggacgcattaaggcattaacgcccagatagacgcaatcggtcggcgtccccatctgtacagcgatatcgccattatcaaaggtaaaagtacgaagcgaagattccagcgtgagggaattagacgcgccgctgcggttacgatccggggctacgacctgtacatcagcaaactcacgcagcgctttcgccagcgtttgtataccgggcgcgtgaaccccgtcatcgttactcagcaatatgcgcccnnnnnnnnnnnttagtccccttcaaggagcaatacagacacaacaataatgataaaaatggcgaaaaacgacgctgttatcatcagcgcttcaagaaacggtggatcgtacatnnnnnnnnnnn diff --git a/tests/test_data/outputs/extract/G7/blast_db/contigs.fasta.ndb b/tests/test_data/outputs/extract/G7/blast_db/contigs.fasta.ndb new file mode 100755 index 0000000000000000000000000000000000000000..5fd6f7085890c929e9fa647574a304e6e7c1d317 GIT binary patch literal 20480 zcmeI&O-jR15CGt@3T7h_@eblfJc3sVVx^$TqQ!Nu)s6HdF1xii(B|hgQK1OcMd8aL zZ(j2%Z#3(Lfo-gxxPK5vg0t5&UAV7cs0RjXF5cpex+fBMroW@J_R2I(m+4ypqiHV!bpVKM|-$ z5Jf;QFC_^mDCSj~r(kHHqhMrUWTK#8XlP|%so>}flwpjBgjp0B0cAOW#DSVN05MQZ GYy$w$avsY7 literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G7/blast_db/contigs.fasta.njs b/tests/test_data/outputs/extract/G7/blast_db/contigs.fasta.njs new file mode 100755 index 0000000..264d3e7 --- /dev/null +++ b/tests/test_data/outputs/extract/G7/blast_db/contigs.fasta.njs @@ -0,0 +1,22 @@ +{ + "version": "1.2", + "dbname": "contigs.fasta", + "dbtype": "Nucleotide", + "db-version": 5, + "description": "locidex/extract/G7/blast_db/contigs.fasta", + "number-of-letters": 22872, + "number-of-sequences": 1, + "last-updated": "2024-06-10T11:09:00", + "number-of-volumes": 1, + "bytes-total": 42908, + "bytes-to-cache": 5952, + "files": [ + "contigs.fasta.ndb", + "contigs.fasta.nhr", + "contigs.fasta.nin", + "contigs.fasta.not", + "contigs.fasta.nsq", + "contigs.fasta.ntf", + "contigs.fasta.nto" + ] +} diff --git a/tests/test_data/outputs/extract/G7/blast_db/contigs.fasta.not b/tests/test_data/outputs/extract/G7/blast_db/contigs.fasta.not new file mode 100755 index 0000000000000000000000000000000000000000..d6562660b009ba390419e760e6e10a80c529e3d8 GIT binary patch literal 20 OcmZQ%fB;4)4Wa-5Gynqt literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G7/blast_db/contigs.fasta.nsq b/tests/test_data/outputs/extract/G7/blast_db/contigs.fasta.nsq new file mode 100755 index 0000000000000000000000000000000000000000..98c4be8b6b33fc26bdeb3da4a5be080375482568 GIT binary patch literal 5808 zcmWModpwhi;~uh9$U)0E)hSGI0 zi@&Tj6J~1j^lcWF8#{mXB=J>oRSQJ@tn4kHdannW?0l6!S#v`S;`FyZYd_=uH;vJ)VFx9A#rk-0_I_KeGmNMAO9o7npU*s712-$bm&3p~gkMcJ6 z%skX~%se@RK%1ue9Ub+10vpQNxonXBQQqXH$bw>!jXYeX|pT;MWnPmF!<|@6QR6LU33`qgZxr=Tid60fnEEQ zmH)8iC(O@E+jrZ?_vtFyfON(&&(2u+5vH4}w5;EsDx9*q;>|z46kd)tEjqll1!|;q z1ke<0zvS?{Bu|LI@ge{kx+{Op5@)~xW0g+85Ud`X?Ho3s9A8y#MD54Xon^i#%19A9 ziC)iMiT(k(erbc#aj|7I(JBA&7|~IR0~eC)KdFwF2Z|i4Cm$+50-Mlp4-h`Ne_p)X z*RWpGu4;O(9wRQP(80Y=4!y3(sz=htX1^|Ys=RbW*C!M|Y9{!bods{(@(!*n$v3_= z_wCBsIlj%=saLTKs>j~qz}9q3TeUubbkk;5dcw=P6Wza)jzOorrkz9Xo-EO9Ira5)a8Wzbn(&K%UyIW} zj3A}2=2dLz-)VIHufdHQ;}@9w+GO()0}s`e@60yp3(g99~-m6yf9=VTq_fxdS+SDY4 z3#14V_ySBa7nTf;qs;+d-F!)$`?2TQE~P@^#M1yxvFaS{d{woWIcIJ8woO(JP$<{! zNq#sY-9&-soMY(j1Wa4F=Zd1Q@YZNK_y=I`T#Lb^R%=>dia6kXrM2)!5No}kJ|n%S z>QAww0~3IYWmhfkv?Sb^t~k|k<1I90TY}7_!-g=g;s|^G{Kps90U>51HRTddq>r>I zk`hwuTE@VRD<%Pg9%R>gZYR0tI9K&Lm)(Z;{ zw9rC13y$E^mV{W&eSn{6{hp4xKa+ygg3 zS5TR}2@iIiRNOX17Hk}a@MJG6nhZeHlYQf9kvjMnJ+YBE&G?zirJVR70o<98r*y*n z7zJMg{lC1`RIPI7JA|Tss`-2R#0cWq9iIt8meKr#IhRoWi+}3F%wdgKUJ>nyr{wyX z3btGB!p{_fZ9RS!qt|66Om49RPbK;CP30fXo6b0%mWnZm_(0LB+g$0*Ja;n9NUK0gf0!Uvue5+*wCHAb5MnJ5*_h}SBR$ax-4|+$8u~*NPc=GQOh{-ME*rSY4fK-bLK;;9=e8qcRb?F8>ZrgOT?jdt6#~cQE9X zkr(-bJ78#8`yZXGkiolVpftdpB17O6DHRg&q|A?M4yLRse`r#iP8s>mhh7z$anDwGt4Heb8_eZn@<%(zxA7()@E<4`Ro-cPNEEQy@04gT`KH-EZ-$?kKJhfEdym7E!gAx2vAB|B0p9xOL(P7icy5cCnl8Uj)`_emb z?~m@P(Kii~72P-4IaN6|T!xxMW`4;_Pzq}F5^4!|v_JYd3?*@gw}T5)za>m5KPrfL zfj06E?}r1dwyiNm!F|T}ez)cxpgo$*aia`EIHO5c@s%5(>EMxVLEbI1(3J1sy=>1^ zWttT?$VfmhbZICABu{d2F~Lik z0+Yp=Q^}+SKD(i}#^aLUb}Vsea2| zx(y{Q>MXz-(8yTgL-vy5ZKzm*!$JlL2dXBv(UEr&ox%M;FW#c6{`uSFRIl1Pe%3}= z{htt5@Ma~5$qXFdpQbrX_b!>I)%x-;S5F~2XB@9>SCOq74%9xZ>{i&;gHCmBleH2J z%LP3k2tS4ATk`B?$~@E@`n#{D9PUWNPpJvFhCWTYzG-%tTO+({C0`2eo2$Z-IWES= zcUQ&f;&x|o`jpLvB3+k%56wAv;af3ZYp%H;ikbRf%GeRdM&0{+$;!>AHm(;k(ZmOq zk?AW`Nt_`=U$9yUs!cqA17vg~q#80x*&6kI2qnwtyCzJ=9=TWGS9Wtm<@pI)u4#K# z1e;#@=EJBv9nVgB5`&Qw^Zu;OLx>gq_WClGe{t?&4b@dQ1E?yF(-Y#|L*XHI)Cc3} zP2=}H=OCAIwL?-KUQ8&iQ{M&EOU23=6QQNHdMH#(qk5giaHWl6WS==4dNsiSGeW4*C5**Z z$$g-JiRR^FD&?*H$_V3;?a}d4Whbd*Csbl_ruaiII#fik@!qe6+>{q{B{BFj^N=g= zm3_tIb*%yEAQnu*emkxXEI)-DvOS|IT;)=BtB*==p1gKzCxXR-yb4l_ZTYP+&=()0 zyfUUb2s&>4tHkvU#~A_6yi_J+I)*&Ub*7m&O;ssCKThAGXgEv+hM`+9*_9iw#`x0< zQoo!wsMNZ#X)kv{x4zInmevjlI^V^jf1j*m-nP1oqXf`$)FPOs1lNl4ufQ1z7V-Ey zPGtvf0i)xRKOH>6Osfw3voDtYb6Q5)^})%lRHCb+gG*l2xU$djUfC8Q)WU%K4G?Ca zOADTD9HYx6mk8GJU;5dECz(;sFskpO24F$H&cK%R!oQ?Vah6%@m!umOew^Xr+ zkyWeH;5+Mgl~Y3qO%n|0vKpXcB^*R4V~4X1B+NCR`=Sd6Xj&(QC}V0NcdLZtu$fB+ z)G?fF8ds~PTvOx~P>oO2gGGX&5^|gy&GeCEhmAzcASdcFTTN_>4nW?<5Y!hpwLYIG zKY}_K9{g5^!}T>;i#-P)HFfLRlk|g6)D+2~MI_1+ivV zR%$F3j^Led5-Yfw;jIW{U1{#!!Wjzra+m;FiD2{Dy3YyRo6tMwxIFqV-DPw!=d?Pg z|EISjcKXPJAG#{n=PUoF7nIiNj<##V-ib6uv9H42eiu(38*tbhzSF}6Iv^G{NLREP z6f@Hhsk$f=o7Ue}oI2lZ(GDBYcYgQiw4Y+|>INJW!u5+QlbR_}b=qEOY@a)cyEOCO zw`4`+rOG)s+UnuOvA6$L3P(<8<;c3kr09TjTGhd~;R27v`Zov<079X!RLLKWbYvWM zUI$h%rk~VIeT*ck%ybAk`I3iX>q~F0tOt#HpW(iOUFwkM!(I|iIoD`?{@X|Tvgdiq zuoYU~`i1xKwSnfpjp)31f&(mC_Z{~fd@KznSn6|Kg%vJW^XEY#ro~1*UhcZuny4p0 zKWC!I*TtIlbV1^MPZhI#M~R12oOXXR;qy zuG-po3q5;1UwMk{UTWD-tH`W?GZu9VUOTWaxu(cb!m+7c+J071T%jC5p><<d0#YFC-?z%|zO^#SO5X64 z9GXKH4RY4)O15cRs*5so&IErvmnaYX*~*@fD=t-mD^sL^gb)}JFnx7AB2fEJW^zKg zo@Ue#T*Mz~lv+iCb~rV&LZ90kj|J{zaS%y&s3gt7&tY|GY#^C_Sv=@<&rz^^yDxJE zD;}f+I?1OEsDDC7jrsqhdz}~p)dZ> zMHofwropEXV8e@gPuZOU@hoz=7=-!BdzJ&gY6Z*LIeoU`>rAfML^x8v*hJ}~CLy>v z>v|y{VLcl0NKsIr$%ET>>uj3upg$A?0@(ix0*#lu@!BQij1oe!)N&8H(MNKI1=nm| zytOZ2`1C>V@m8MrkOs7n!DKF(ytUx<^wsX7oq^LidEGQ|#LWOgD};Rv|DA8)o{vsj zntyg9iX$-^Ii5%jNTp%MP7#l0E_qNlOrR(k1K874x)mB|6hd2}ik;<2+EkRylipBB zq)6T11sBxyUKqR}apEgCB>@Lw)|CCnH-0P}z24-Sp=UlXEsbe=U2JATjvXcutXN5< zxk;~CHKO(^4v6oncZ+K;#q>ZwBq$?@G!q}QM2p{Kf>?c_EB9CqoUzZf5M}p4Lt)f( zlYqR)I1BpE1RfKjrZ7Y)JKhAs+YKgtc%NDAQMGl$!n%G6CRW#goSDiv^5lUv_FHRm z3IsL?&`0(DoZ)sKvH0kv|F@+cMS*t=FP0l&Wp(n`cRr*L4)z&!VVE3AroV{1xsQ4A zwBNH$JK`0OSLo6jXXd)15SOl*_&-qEat>8lAK;^wApV)}A@HzqJ;B1cl&P4UuceneLeg7T3ek5py zIiGv3#Hz3Rf7WkIv!TN!1$nG3xEze72`RYe?)u7mCbv3yT|FEG4!vo#A*x`L>>s!9 z05@w!DgCPIco<6vg3%q+6WX^caaP|}{LAB`Pom7c44=fHc}5Db6A2cedNt=|P{9eb z#Fxdrloi0&>Ey0yKk+WX(ZmIi@{&^yK8$U^)bf#a-`2d*bOtn3zUO(QC9kf*KKhQR zU^5Vt<`sRIS;Uq;{Kxdr|F(yIqVJp>sj!k(>&;eK4Ltqr%l(D17W~Vore<{O(_w$e zS$%8$4YQHXEhf`xsbqlf4M{*O?k`o^HpH35ZSNa85mF&%a?PJhExe(7Z8#VA*Eblt zzDVg%7;vfI_kGNkdALJ>#P;>ODp%uey0(<)EIPnERhyr+d93IuhIf|MU;TZbrD5=u zG$sq%B$GVCF5c|taO=y+<1Evj%i|!Ja$EArOYmy1;@AGB4RP9E_SWuzTs4t^l2H&p zRPv6Lx}gr|1gcUU)$ep4%=WBAT*5>Iwx$7PRCqf}6t!MxqSGL+^< zm$IV!`Gibe#n}W)$`vK!no~(ltoeERr<1YT@R9y%R-|*MaDU{}UomH=Dy*-K=PcsM zVq=cJK>WPDMk8eDV4$aH;NuWOnNjHwchxBOIZ!*K_E#lr+{D&%AkbZx!wT(k{~%Y<-iFDb~QElK##13rY3R_j0i3{&nddUnNI|6scz4;3X`AmTFg z&`weZ!XJZ?cTiez=U%^<74-uU$rMc)g6;BwR)r1TLct%a&0)|6N8|VwO?% q-?(3}j3NvyqdZHNQGRaAs6hW^RPZ+s{J&*X*tcB7-^(a6@&5rrt~3Y$ literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G7/blast_db/contigs.fasta.ntf b/tests/test_data/outputs/extract/G7/blast_db/contigs.fasta.ntf new file mode 100755 index 0000000000000000000000000000000000000000..005ac416d78d808991db961db8e272a0664078b1 GIT binary patch literal 16384 zcmeI&O=`kW5CGt5>cWK(D?Lc@1YSi@p{p+HqW9@Fr1T_$d!a|DnwR0}#*Gvzd|71X z{g6z)-S85T*RV^r-#=duIdzJkyQgkAju_6m?CR~ezpidNTg?3z?>t+V5+Fc;009C7 z2oNAZfB*pkw+qz1`!D+c`t+KWnseOrpYYhnoWK3cJb?fK0t5&UAV7cs0RjXF5J-XS zzayHM#HQV}xgruEK!5-N0t5&UAV7cs0RsO);N@6E6QfwqKVBEpx7F%>xjxnP9Q72v tW~JgB=Xra^(*N(D`o%y literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G7/blast_db/contigs.fasta.nto b/tests/test_data/outputs/extract/G7/blast_db/contigs.fasta.nto new file mode 100755 index 0000000000000000000000000000000000000000..20d5cb86e6dff1f3684dc229a358a2ea697cecfb GIT binary patch literal 8 KcmZQ%fB*mh5C8%I literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G7/filtered.hsps.txt b/tests/test_data/outputs/extract/G7/filtered.hsps.txt new file mode 100755 index 0000000..8e28468 --- /dev/null +++ b/tests/test_data/outputs/extract/G7/filtered.hsps.txt @@ -0,0 +1,21 @@ +qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore locus_name ext_start ext_end is_5prime_complete is_3prime_complete is_complete is_5prime_boundary is_3prime_boundary is_on_boundary reverse complement is_extended is_5p_extended is_3p_extended +0 0 102 22872 0 101 22860 22759 102 0 100.0 100 100 minus 1.84e-51 189 locus_1 22759 22860 True True True False False False True False False False False +9 0 762 22872 2 761 22747 21986 760 0 100.0 99 99 minus 0.0 1404 locus_10 21986 22747 False True False False False False True False True True False +10 0 858 22872 0 857 21974 21117 858 19 97.786 100 100 minus 0.0 1480 locus_11 21117 21974 True True True False False False True False False False False +11 0 972 22872 0 971 21105 20134 972 0 100.0 100 100 minus 0.0 1796 locus_12 20134 21105 True True True False False False True False False False False +12 0 1098 22872 0 1097 20122 19025 1098 0 100.0 100 100 minus 0.0 2028 locus_13 19025 20122 True True True False False False True False False False False +13 0 1281 22872 0 1280 19013 17733 1281 11 99.141 100 100 minus 0.0 2305 locus_14 17733 19013 True True True False False False True False False False False +14 0 1434 22872 0 1433 17721 16288 1434 0 100.0 100 100 minus 0.0 2649 locus_15 16288 17721 True True True False False False True False False False False +15 0 1464 22872 0 1463 16276 14813 1464 15 98.975 100 100 minus 0.0 2621 locus_16 14813 16276 True True True False False False True False False False False +16 0 1836 22872 0 1835 14801 12966 1836 0 100.0 100 100 minus 0.0 3391 locus_17 12966 14801 True True True False False False True False False False False +17 0 1914 22872 0 1913 12954 11041 1914 0 100.0 100 100 minus 0.0 3535 locus_18 11041 12954 True True True False False False True False False False False +18 0 2037 22872 0 2036 11029 8993 2037 16 99.215 100 100 minus 0.0 3674 locus_19 8993 11029 True True True False False False True False False False False +1 0 285 22872 0 284 8981 8697 285 17 94.035 100 100 minus 2.3600000000000003e-124 433 locus_2 8697 8981 True True True False False False True False False False False +19 0 4935 22872 0 4934 8685 3751 4935 0 100.0 100 100 minus 0.0 9114 locus_20 3751 8685 True True True False False False True False False False False +2 0 327 22872 0 326 3739 3413 327 0 100.0 100 100 minus 5.47e-176 604 locus_3 3413 3739 True True True False False False True False False False False +3 0 417 22872 0 416 3401 2985 417 11 97.362 100 100 minus 0.0 710 locus_4 2985 3401 True True True False False False True False False False False +4 0 444 22872 0 443 2973 2530 444 15 96.622 100 100 minus 0.0 737 locus_5 2530 2973 True True True False False False True False False False False +5 0 543 22872 0 542 2518 1976 543 0 100.0 100 100 minus 0.0 1003 locus_6 1976 2518 True True True False False False True False False False False +6 0 606 22872 0 605 1964 1359 606 15 97.525 100 100 minus 0.0 1037 locus_7 1359 1964 True True True False False False True False False False False +7 0 642 22872 0 641 1347 706 642 0 100.0 100 100 minus 0.0 1186 locus_8 706 1347 True True True False False False True False False False False +8 0 684 22872 0 683 694 11 684 0 100.0 100 100 minus 0.0 1264 locus_9 11 694 True True True False False False True False False False False diff --git a/tests/test_data/outputs/extract/G7/processed.extracted.seqs.fasta b/tests/test_data/outputs/extract/G7/processed.extracted.seqs.fasta new file mode 100755 index 0000000..9ef3116 --- /dev/null +++ b/tests/test_data/outputs/extract/G7/processed.extracted.seqs.fasta @@ -0,0 +1,40 @@ +>locus_1:0:0:0 +atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa +>locus_10:9:0:1 +gggcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa +>locus_11:10:0:2 +gtgaacctggtgaaaaccacgggtctgcacgcggcgggcaaaggcattaacgttgccaaagtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccgacttcaacttttccggctttgacgtgactcccgcagactgggaacgctttgttaacgactccctgagctggctgggtcagttcgatatggtctgcgtcagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatctttgatagtagccgtgaagcgttagtcgccggtcttaaagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcacgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgtgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggtaagtaccgtcggcgcaggcgattccatggttggcggattgatttacggcctgctgatgcgcgagtccagcgaacatacgctgcgtctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga +>locus_12:11:0:3 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>locus_13:12:0:4 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>locus_14:13:0:5 +ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctcttccgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccaacgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcagaaaggcgggattggtttgcgcggtccgggtgaaacccagctcgaaaccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgaaaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgctgctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaaatccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtctgcaggtacgaatgccgattgtcgactggcgtcgcctctgtaaacaagaaccagcgttgatcgaatacgtgatctag +>locus_15:14:0:6 +gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga +>locus_16:15:0:7 +atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctggtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatcagcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacgctgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcattcaaccggaatggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaacgctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgaggcgctggcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtgagctgaccagccttgtgcgcaaccttaatcaactgctcaaaagcgagcgcgaacgttataacaaataccgcacgaccctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatcagcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcataccccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttttggccgacagcatcccacacagaaagaggaataa +>locus_17:16:0:8 +atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa +>locus_18:17:0:9 +atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag +>locus_19:18:0:10 +atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacattgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccaccgtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggccgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattactggaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcattcagggcggtacggaagtttgtcagcaaaaactggataccatgcgcaacgcaggcgtaaaagtgaacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggcgtcagcgcagagatcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacaggagaccggcaagctcggcgagatcctgttctttatgcgtgcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccggcaataactggcagttcgacggcgacgcggaaactattgcccactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgaaattaccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa +>locus_2:1:0:11 +atgaaactttttctgacaacggcagcgctaactgcgaccttgatttccggcatggcatttgccagcgatcccgtcatcccctgggcgactaacagcggcggtacggaaagcacgcatattgcggcaatgggcgaggatttgaatgcgcagcatcagcagatcacccacacgcatgaaggcgtatgggcggccaattccggcagcattcaggctgatgaagccgcgctgaccagcaacaagccgccggtacaaggccacccggaattgatgccgcatcagggctaa +>locus_20:19:0:12 +atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa +>locus_3:2:0:13 +atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa +>locus_4:3:0:14 +ctgaaactaatacctttttatttgctcgcgctattttctgccgcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaaccactgcagataacccgccttataccatgacaatattgccgccagtagaaggctgcggggaaaaatgtgtgaaactgaacgtacagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaaggacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctataaccgacagctgcaattactttgggctggcgtaataccgaaaaataaaaaggggaaatggataacgaaaatggaaaattactggttcaggcaggtaaaagcgcactaa +>locus_5:4:0:15 +atgaaaaaattttatagctgtcttcctgtctttttactgatcggctgtgcccaggtgcccctcccttcctccgtgagcaaaccggtacagcaacctggcgctcagaaagagcaactggccaacgcaaatagtattgatgagtgtcagtctcttccgtatgtgccgtcagaccttgcgaagaataaatcattatcaaaccagaacgctgataattccgcatcaaaaaatagcgcaatcagctcaagcattttttgcgaaaaatataaacaaaccaaagaacaggcgcttaccttcttccaggaacatccacaatacatgcgttcgaaagaagatgaagagcaactcatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa +>locus_6:5:0:16 +atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa +>locus_7:6:0:17 +gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggcctcacaacatgcctggagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgtcagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcaccatccagtcggcaacctggttttccgcaggcaggtcgcttacggcggataaaaaaatgatggattgcggcagcggtatttatgcttcaatcaatacgttactgaaaaagtcgcagaataaaaatatagtcatttttacccataatcattgtttgacgtatattgcaaaaaataagcgcggcgtgaagtttgatccggactatcttaatgcgctcgttatgtatgcggaaaacggaaaactctttttagacggtgaatttgttcccggctag +>locus_8:7:0:18 +atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga +>locus_9:8:0:19 +atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/extract/G7/raw.extracted.seqs.fasta b/tests/test_data/outputs/extract/G7/raw.extracted.seqs.fasta new file mode 100755 index 0000000..9ef3116 --- /dev/null +++ b/tests/test_data/outputs/extract/G7/raw.extracted.seqs.fasta @@ -0,0 +1,40 @@ +>locus_1:0:0:0 +atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa +>locus_10:9:0:1 +gggcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa +>locus_11:10:0:2 +gtgaacctggtgaaaaccacgggtctgcacgcggcgggcaaaggcattaacgttgccaaagtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccgacttcaacttttccggctttgacgtgactcccgcagactgggaacgctttgttaacgactccctgagctggctgggtcagttcgatatggtctgcgtcagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatctttgatagtagccgtgaagcgttagtcgccggtcttaaagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcacgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgtgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggtaagtaccgtcggcgcaggcgattccatggttggcggattgatttacggcctgctgatgcgcgagtccagcgaacatacgctgcgtctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga +>locus_12:11:0:3 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>locus_13:12:0:4 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>locus_14:13:0:5 +ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctcttccgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccaacgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcagaaaggcgggattggtttgcgcggtccgggtgaaacccagctcgaaaccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgaaaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgctgctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaaatccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtctgcaggtacgaatgccgattgtcgactggcgtcgcctctgtaaacaagaaccagcgttgatcgaatacgtgatctag +>locus_15:14:0:6 +gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga +>locus_16:15:0:7 +atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctggtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatcagcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacgctgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcattcaaccggaatggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaacgctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgaggcgctggcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtgagctgaccagccttgtgcgcaaccttaatcaactgctcaaaagcgagcgcgaacgttataacaaataccgcacgaccctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatcagcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcataccccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttttggccgacagcatcccacacagaaagaggaataa +>locus_17:16:0:8 +atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa +>locus_18:17:0:9 +atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag +>locus_19:18:0:10 +atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacattgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccaccgtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggccgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattactggaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcattcagggcggtacggaagtttgtcagcaaaaactggataccatgcgcaacgcaggcgtaaaagtgaacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggcgtcagcgcagagatcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacaggagaccggcaagctcggcgagatcctgttctttatgcgtgcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccggcaataactggcagttcgacggcgacgcggaaactattgcccactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgaaattaccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa +>locus_2:1:0:11 +atgaaactttttctgacaacggcagcgctaactgcgaccttgatttccggcatggcatttgccagcgatcccgtcatcccctgggcgactaacagcggcggtacggaaagcacgcatattgcggcaatgggcgaggatttgaatgcgcagcatcagcagatcacccacacgcatgaaggcgtatgggcggccaattccggcagcattcaggctgatgaagccgcgctgaccagcaacaagccgccggtacaaggccacccggaattgatgccgcatcagggctaa +>locus_20:19:0:12 +atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa +>locus_3:2:0:13 +atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa +>locus_4:3:0:14 +ctgaaactaatacctttttatttgctcgcgctattttctgccgcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaaccactgcagataacccgccttataccatgacaatattgccgccagtagaaggctgcggggaaaaatgtgtgaaactgaacgtacagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaaggacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctataaccgacagctgcaattactttgggctggcgtaataccgaaaaataaaaaggggaaatggataacgaaaatggaaaattactggttcaggcaggtaaaagcgcactaa +>locus_5:4:0:15 +atgaaaaaattttatagctgtcttcctgtctttttactgatcggctgtgcccaggtgcccctcccttcctccgtgagcaaaccggtacagcaacctggcgctcagaaagagcaactggccaacgcaaatagtattgatgagtgtcagtctcttccgtatgtgccgtcagaccttgcgaagaataaatcattatcaaaccagaacgctgataattccgcatcaaaaaatagcgcaatcagctcaagcattttttgcgaaaaatataaacaaaccaaagaacaggcgcttaccttcttccaggaacatccacaatacatgcgttcgaaagaagatgaagagcaactcatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa +>locus_6:5:0:16 +atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa +>locus_7:6:0:17 +gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggcctcacaacatgcctggagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgtcagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcaccatccagtcggcaacctggttttccgcaggcaggtcgcttacggcggataaaaaaatgatggattgcggcagcggtatttatgcttcaatcaatacgttactgaaaaagtcgcagaataaaaatatagtcatttttacccataatcattgtttgacgtatattgcaaaaaataagcgcggcgtgaagtttgatccggactatcttaatgcgctcgttatgtatgcggaaaacggaaaactctttttagacggtgaatttgttcccggctag +>locus_8:7:0:18 +atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga +>locus_9:8:0:19 +atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/extract/G7/seq_data.txt b/tests/test_data/outputs/extract/G7/seq_data.txt new file mode 100755 index 0000000..2e85820 --- /dev/null +++ b/tests/test_data/outputs/extract/G7/seq_data.txt @@ -0,0 +1,21 @@ +id seqid locus_name query_id qlen start end sub_start sub_ent ident qcovs bitscore reverse complement is_complete is_trunc fivep_trunc threep_trunc is_extended is_5p_extended is_3p_extended seq start_codon stop_codon is_stop_valid is_start_valid is_cds_valid +0 0 locus_1 0 102 22759 22861 22860 22759 100.0 100 189 True False True False False False False False False atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa atg taa True True True +1 0 locus_10 9 762 21986 22748 22747 21986 100.0 99 1404 True False False False False False True True False gggcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa ggg taa True False False +2 0 locus_11 10 858 21117 21975 21974 21117 97.786 100 1480 True False True False False False False False False gtgaacctggtgaaaaccacgggtctgcacgcggcgggcaaaggcattaacgttgccaaagtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccgacttcaacttttccggctttgacgtgactcccgcagactgggaacgctttgttaacgactccctgagctggctgggtcagttcgatatggtctgcgtcagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatctttgatagtagccgtgaagcgttagtcgccggtcttaaagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcacgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgtgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggtaagtaccgtcggcgcaggcgattccatggttggcggattgatttacggcctgctgatgcgcgagtccagcgaacatacgctgcgtctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga gtg tga True True True +3 0 locus_12 11 972 20134 21106 21105 20134 100.0 100 1796 True False True False False False False False False atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga atg tga True True True +4 0 locus_13 12 1098 19025 20123 20122 19025 100.0 100 2028 True False True False False False False False False atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga atg tga True True True +5 0 locus_14 13 1281 17733 19014 19013 17733 99.141 100 2305 True False True False False False False False False ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctcttccgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccaacgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcagaaaggcgggattggtttgcgcggtccgggtgaaacccagctcgaaaccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgaaaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgctgctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaaatccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtctgcaggtacgaatgccgattgtcgactggcgtcgcctctgtaaacaagaaccagcgttgatcgaatacgtgatctag ttg tag True True True +6 0 locus_15 14 1434 16288 17722 17721 16288 100.0 100 2649 True False True False False False False False False gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga gtg tga True True True +7 0 locus_16 15 1464 14813 16277 16276 14813 98.975 100 2621 True False True False False False False False False atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctggtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatcagcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacgctgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcattcaaccggaatggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaacgctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgaggcgctggcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtgagctgaccagccttgtgcgcaaccttaatcaactgctcaaaagcgagcgcgaacgttataacaaataccgcacgaccctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatcagcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcataccccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttttggccgacagcatcccacacagaaagaggaataa atg taa True True True +8 0 locus_17 16 1836 12966 14802 14801 12966 100.0 100 3391 True False True False False False False False False atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa atg taa True True True +9 0 locus_18 17 1914 11041 12955 12954 11041 100.0 100 3535 True False True False False False False False False atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag atg tag True True True +10 0 locus_19 18 2037 8993 11030 11029 8993 99.215 100 3674 True False True False False False False False False atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacattgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccaccgtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggccgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattactggaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcattcagggcggtacggaagtttgtcagcaaaaactggataccatgcgcaacgcaggcgtaaaagtgaacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggcgtcagcgcagagatcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacaggagaccggcaagctcggcgagatcctgttctttatgcgtgcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccggcaataactggcagttcgacggcgacgcggaaactattgcccactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgaaattaccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa atg taa True True True +11 0 locus_2 1 285 8697 8982 8981 8697 94.035 100 433 True False True False False False False False False atgaaactttttctgacaacggcagcgctaactgcgaccttgatttccggcatggcatttgccagcgatcccgtcatcccctgggcgactaacagcggcggtacggaaagcacgcatattgcggcaatgggcgaggatttgaatgcgcagcatcagcagatcacccacacgcatgaaggcgtatgggcggccaattccggcagcattcaggctgatgaagccgcgctgaccagcaacaagccgccggtacaaggccacccggaattgatgccgcatcagggctaa atg taa True True True +12 0 locus_20 19 4935 3751 8686 8685 3751 100.0 100 9114 True False True False False False False False False atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa atg taa True True True +13 0 locus_3 2 327 3413 3740 3739 3413 100.0 100 604 True False True False False False False False False atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa atg taa True True True +14 0 locus_4 3 417 2985 3402 3401 2985 97.362 100 710 True False True False False False False False False ctgaaactaatacctttttatttgctcgcgctattttctgccgcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaaccactgcagataacccgccttataccatgacaatattgccgccagtagaaggctgcggggaaaaatgtgtgaaactgaacgtacagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaaggacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctataaccgacagctgcaattactttgggctggcgtaataccgaaaaataaaaaggggaaatggataacgaaaatggaaaattactggttcaggcaggtaaaagcgcactaa ctg taa True True True +15 0 locus_5 4 444 2530 2974 2973 2530 96.622 100 737 True False True False False False False False False atgaaaaaattttatagctgtcttcctgtctttttactgatcggctgtgcccaggtgcccctcccttcctccgtgagcaaaccggtacagcaacctggcgctcagaaagagcaactggccaacgcaaatagtattgatgagtgtcagtctcttccgtatgtgccgtcagaccttgcgaagaataaatcattatcaaaccagaacgctgataattccgcatcaaaaaatagcgcaatcagctcaagcattttttgcgaaaaatataaacaaaccaaagaacaggcgcttaccttcttccaggaacatccacaatacatgcgttcgaaagaagatgaagagcaactcatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa atg taa True True True +16 0 locus_6 5 543 1976 2519 2518 1976 100.0 100 1003 True False True False False False False False False atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa atg taa True True True +17 0 locus_7 6 606 1359 1965 1964 1359 97.525 100 1037 True False True False False False False False False gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggcctcacaacatgcctggagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgtcagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcaccatccagtcggcaacctggttttccgcaggcaggtcgcttacggcggataaaaaaatgatggattgcggcagcggtatttatgcttcaatcaatacgttactgaaaaagtcgcagaataaaaatatagtcatttttacccataatcattgtttgacgtatattgcaaaaaataagcgcggcgtgaagtttgatccggactatcttaatgcgctcgttatgtatgcggaaaacggaaaactctttttagacggtgaatttgttcccggctag gtg tag True True True +18 0 locus_8 7 642 706 1348 1347 706 100.0 100 1186 True False True False False False False False False atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga atg tga True True True +19 0 locus_9 8 684 11 695 694 11 100.0 100 1264 True False True False False False False False False atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag atg tag True True True diff --git a/tests/test_data/outputs/extract/G8/blast/hsps.txt b/tests/test_data/outputs/extract/G8/blast/hsps.txt new file mode 100755 index 0000000..9fdef58 --- /dev/null +++ b/tests/test_data/outputs/extract/G8/blast/hsps.txt @@ -0,0 +1,20 @@ +0 0 102 22872 1 102 22861 22760 102 0 100.000 100 100 minus 1.84e-51 189 +1 0 285 22872 1 285 8982 8698 285 0 100.000 100 100 minus 1.05e-152 527 +2 0 327 22872 1 327 3740 3414 327 0 100.000 100 100 minus 5.47e-176 604 +3 0 417 22872 1 417 3402 2986 417 0 100.000 100 100 minus 0.0 771 +4 0 444 22872 1 444 2974 2531 444 0 100.000 100 100 minus 0.0 821 +5 0 543 22872 1 543 2519 1977 543 0 100.000 100 100 minus 0.0 1003 +6 0 606 22872 1 606 1965 1360 606 0 100.000 100 100 minus 0.0 1120 +7 0 642 22872 1 642 1348 707 642 0 100.000 100 100 minus 0.0 1186 +8 0 684 22872 1 684 695 12 684 0 100.000 100 100 minus 0.0 1264 +9 0 762 22872 4 762 22745 21987 759 0 100.000 99 99 minus 0.0 1402 +10 0 858 22872 1 858 21975 21118 858 0 100.000 100 100 minus 0.0 1585 +11 0 972 22872 1 972 21106 20135 972 0 100.000 100 100 minus 0.0 1796 +12 0 1098 22872 1 1098 20123 19026 1098 0 100.000 100 100 minus 0.0 2028 +13 0 1281 22872 1 1281 19014 17734 1281 0 100.000 100 100 minus 0.0 2366 +14 0 1434 22872 1 1434 17722 16289 1434 0 100.000 100 100 minus 0.0 2649 +15 0 1464 22872 1 1464 16277 14814 1464 0 100.000 100 100 minus 0.0 2704 +16 0 1836 22872 1 1836 14802 12967 1836 0 100.000 100 100 minus 0.0 3391 +17 0 1914 22872 1 1914 12955 11042 1914 0 100.000 100 100 minus 0.0 3535 +18 0 2037 22872 1 2037 11030 8994 2037 0 100.000 100 100 minus 0.0 3762 +19 0 4935 22872 1 4935 8686 3752 4935 0 100.000 100 100 minus 0.0 9114 diff --git a/tests/test_data/outputs/extract/G8/blast_db/contigs.fasta b/tests/test_data/outputs/extract/G8/blast_db/contigs.fasta new file mode 100755 index 0000000..6226bc8 --- /dev/null +++ b/tests/test_data/outputs/extract/G8/blast_db/contigs.fasta @@ -0,0 +1,2 @@ +>0 +nnnnnnnnnnnctagaatgaaccggaatgcggctctaaactcatcccataatcgctgattaatcgccacgttaccgtaccatgaggggcggcagtcgaactcaatttaaatgacgctgatgattttggcggaacgaaggtcgctgatttcacttcatggctgttaagcgtcacactggcaaagttcatataatagggggtagggttatttacggtaataacatcccctgccgtctgccattttaattgttggctctggctatcaggcgttgatttggttaatgctggcggtcgataaataagctttatttgggtattaatggagatttccacgcggttcgcggaggcattatcatcaatagaaggaatacccttaatattgagccagtacatagactcccgatctgcaggtagaggggcgccagagcgaataacccgaatactgttcttttgcccggcatcaaggcgaaaaagaggcggggtgataataaacacctgcttatttgtgacctggggatcggcaaccgataaccatgactgaacaagattcgctttgctgtctttattctctacattgattgacgactcatcattattgccgtcgaaaaccagtcgggttccgccaacaacaatgctggcatgggcaacatggcttgtcagtaatacgaccaacacggatttctttatgtgtttcatnnnnnnnnnnntcatgccgcttgctcctgcacccacttcgtcaccgtggcgcgaatgtcgcgcatgacggcatccagcggctgggtcgcgtcaatggtgcggatacgcgagtcttgcgccgccagttccagatagcgcgcgcgagtccggttaaagaaatcaaaagattcctgctcaatgcgatccaaatcgccgcgcgctctggcgcgttttagcccgacttccggcgtgacatccagatacagcgtcaggtcaggacgaaaatcgcccagcacggcatcgcgtagtgtcgccagcatggtttgatcaatgccgcgccctcccccctgatacgcctgggtcgagagatcgtggcgatcgccgatcacccatacgccttgcgccagcgcgggtttgattaccgtttcgacgagctgtacgcgagcggcataaaacatcagcacttccgctttatcggtaatcacttcgtcgcctaccgatcggatatccagcaccagacttcttagtttttcggcaagctgcgtaccgcccggctcacgggtaaaaatcatgttacgaatacccagttgctcaagggtctccaccaccacgtcgcgcgcagtggtttttccggcgccttccaggccctcgatgacgatataattactgcccatnnnnnnnnnnnctagccgggaacaaattcaccgtctaaaaggagttttccgttttccgcatacataacgagcgcattaagatactccggatcaaacttcacgccgcgcttattttttacaatatacgtcaaacaatgattatgggttaaaatgactatatttttaatctgcgaatttttcagtaacgtattgattgaagaataaataccgctgccgcaatccatcatttttttaaccgccctacgcgacctgcctgccgaaaaccaggttgccgactggatggcgcgcaccgtattactggaatagagattgtaattttgtatatcggcactgaaggctttacccagcgctctggcatcttgcgcaccgttgaccgtaatccccgtgctgtctggcaggcaggtattatcggaacgatcgcaccgctcggcatggcggaataacacgactaccggatgctgcttcgccagcgccgccagcgccttaccgttaatctgcggtagaccgttaccgctacaggcatgttgtgagtccaacccagcgataataaccaacgcccccgccagaatcgcgaaatagcgtttgtttttaataaagcgtagggtaaatgccagcacnnnnnnnnnnnttaaaccgcttgttcaaggctgaaactgtgacagtgaacctgcggtttcagcctgtcgttcataattttaatcgcatcgccaagctgcatggttcggcctgctatcaccacgcctggctgtgccagcagacacaacgcggcattttctcccggctcaacgaccagcaaattaacctgttccgccgtatcgcttaaccagacgcttgcggcatcacccgtgcccggggtccacatttcgccatgcgccacaaaatgccagcttttcggcatctgcggtttgagatagcgaatcgccaccagcgcattcaataccagctccgcgcgttgctctttggttaattcgaaatcccggcatttttcttcaaaggaaaaatagagcgcggcatcatccacacaaaaaccggtcgggcaaaacgcgtccggcgtaagcattttacgagagaagcgcgagcgaaaaagcataccattggcgagatcgagcatcatacgatcgtgctcttcatcataataccagcgccagttatcgtcaggtttaattcgcatnnnnnnnnnnnttataaggcttgcagtctttcatgggcagcaagtaacgtctgatatatgcttaaattcttacttccgggttcaagtagaacttttttaaattcggtcatgtgttgctcttcaacttctttcgaacgcatgtattgtggaagttcctggaagaaggtaagcgcctgttctttggtttgcttatatttttcgcaaaaaatgcgtgagctgattgcgctattttttgatgcggtattatcagcgatctggtttgataatgatttattcttcgcaaggtctgaaggcacatacggaagtgactgacactcatcaataccatttgcgttggccagttgctctttctgagcgctaggttgctgtaccggtttgctcacggaggaacggagaggcacctgggcacagccgatcagtaaaaagacaggatgacagctatcaaattttttcatnnnnnnnnnnnttagtgcgcttttacccgcctgaaccagtaattttccatcttcgttatccatttccactttttatttttcggtattacgccagccctaagtaattgcagctgtcggttgtaggttccaatagcgtgtccaccatacgccccttcaaatgctatcaccatgggtccctgcccttttaccccctcttgtcgttcatggcaataaagcgcgttgcggtgagtcacgtcaagttcatactctacgttcagtttcacacatttttccccgcaggcttctactggcggcaatattgtcacggtataaggcgggttatctgcaggggttttccatgttccgatcaggtctttgcaggcgtttatctccgtagccccggaggaggcagaaaatagcgcgagcaaataaaaaggtattagtttcagnnnnnnnnnnnttagtcccaccaaacgtcgaaaagttcgctggttcggacttcttcaaggttgcgcgcttccagccacttacgcacaatcgcctgatgttcttcggtacatttaccgatttcctgcatacaaatcagcccttcccaggccaggtagccgctgccgtcaaacgccagtttattaggctcaataacgtcattaataaagtcatcgacagtcttatcgatctgctcttcagatgtaccttccggaaaacgccatgccaccgaaaatcctaattcctggaattcgtcaatgtgcatttttttacgcagacgacgactacggttctttgccatnnnnnnnnnnnttaaggcgtcacaatcagcagtccctcgctcgcgccggttgcccgccactgaggcgcgtacatcgattccacctgcggttgcgcaagctggtacgtccccggcgttaccgcgcgcgccaggtagaccagcgtcacgggctggccctcattgacaacgacggcagccacaaaccgatcgtcgcggaattccatatactgaatatccgcctgctgcatctgattaagcagattttgcacttcgctaccgctctccggcaggctggcgctgctgtcagccagattctggttttccagctccagcccggccgggagcaggtccaccaccagcgcatccggcacattgcgatcggccactaccgttaaccagaccagcaccagttcgccgctacgcaacgaggacagcgatttgcgctgaccatcggtccccagtatttgtcgttcaatctgcaaaacgttgctggcaggctcaggcgcagatgagggatagccgctgctatccagacgcagccatagcggctggctaccggtgttcgtcacctcaagggcggccagctgatcagcatccagattacgggtcagcgccttgtcgcccgacagcggctgcgcctctaacgaggtctgcgcctgccaggcgcccgcgctggcctgtcgcgaatgcgcggcgaggaacaaggcattgttctcctgggtagagagccagcgctgaccgaaggcctgctcagaaagcgagcttaatagcgcgttttgcgcgtccggtctgaggttgttctcttccagtaacgacaacatcagagcgttatcgcgcagagaactgccgtaatccgctatccattgccgttcgtcctgacgcggcgtattcagagccagcgtaatggcctcttcgccgcgtctggcatcacccatcgtgtttaacgcgatgcccaattgcatcagcggcagtcctgaagccgcctgactacggcgctcccagatttcgcgcagcgcgccgagcggcgctttctgctgacgcgccagtaccagcgcggcgtaagcctgagcggcaaaagtactggcctgggtattatcgctataacgaatcagcatcgtaccgggatcctgcagatagcgcagcagtcgctcattgccccggttaatggcctccggcgggacgctatatccctgctcgcccgcgcgaatgaggaaatccatcgcgtaggccgttagccagggctcttccgccccattttcatcccatagcgcaaaaccgccgttatcacgctgcatctgtagtatgcgggagatgccgatatccaccgcggcgcgccgtttttcatcgctatcgccggtaataccgagcgactgcaattgagcggcattggtatacagcgccgggaataacccgctggtggtttgttccaggcacccgtacggatatgctttcagctcgcgaatgtagcgcgccagattgagcggcggttttccgcttaacagcagttgtccctgtaacgtggctggcgagacgtttgccagatgctgctctggtacatgccagctctctcccggcgccagcgcaatgccgctatttaccgtttgggcaggccaggccggacgcacgccgatttgccactgcttatgctgcgcgccgagggtttctcccggcagattcagaccgctaatggtcgcctggatttcgccttcgccaaaaccttccagcgcgcgtaccggaacgaataaggtggtgcgcacgcccggcgccaggttgaccggttgcggctgttgactaagcagttccagtaacccactggcggcgagcgcaatattcagcgtctgcgggcggtcggtcagattggtgacgtccagcaccagtcgcgaaacatcccctcccgccagaaaacgcggcatattcagctcggcaatcactggcgcggcgacaacgactttgctttcgccgcgaccaaaatcgtccgctgtccatgcctgcgccataacccgcagttcgccgttaaagtcgccaatcggcagcgttacgaccccttcgccctgctcattgagcgtgatcggctgcgcctgctgcgcgatgatattggcatggtttaccggcggttttccgccgcgcgtaaggtcgtcgccatcgccgccaaaacgcaacgccgccagccgcccctgcccttcaatgacctggccgtaaatatcgtagatatccgcaccgtagcgtttttgaccgaagaacgcctgccacgggtccggcgtcgcgtaatcggtgatattcaatacaccgctatcgaccgcggagaccagcacgttgatctgttttggcatttcgccgtgtttaacgctggctttcaccctgacggtgagcggctgattcgggcgcattttagccgggctttccagcgccagatcgaggcggcggttgtcatcccccagcggtagatgtagtaaccccacggcgcgttttggcgtcgcggaacgagatttatcgccgggacgcaccaccagcgtactgagatagagatcgtggcgattccaggttttatccaccggaatcgtgagctccagcccctgcgccggcacgtcgatcgcctgccaccacagcggaccatcgctggactccaccatggcataacctttaccggcgaccggcgcggcgatatgcaatttcatggtgtcgcctggacgataattcgctttatccagtttgagggtgacgcgatccggacgcgccgcgccgctaccgtcgctgttatcctgccagctatagccggcccagaaacgaacgctgctgaccgtctcattcggcgctttgacctccagacggtacgcgccccattccaccgggaagctgacttttccggtttcatccgcgttcagatccagcgtctgctcgccctccaccagatctttttgatcaaactgcgactgccagccttcgctttccgaccagttccagtaatagtcgcgacgctcgcggatgagccgcacctgtaaaccggacaccgcttttttctcgccctgcgcgttggcgtaaacaatatcgaatgcggcgttgctgtcttcgtcgacaatcggttgattaacggtggtatccgtacggtagtcgtataccgctttggcggcgaactgtggacgaattcccggtaacgtatcggcaggccaaatcgcctgctctacgcgacgagtgaccggacgaccgccagactccagcaggctggcctgtagaattacctgcaacggcgaatgcgcttcttgccactggctggcagcactcacttcaccacgtccgcctttatccaacgtcagttgaacttcgtccaggctgcgcgaaagattctcttcggcaatattgccgaactggaagccaggcaacgccgcgacagcgtcgcgcagcggacgcaggaaaagttgcccttgcagggtattgccgttagcaggggcgccatacaagtaatagccgacaacggagaatttcacctcatccgcaggcgccagcggtgttttttgcgccgtcaggttgagcgccatccgctccggcataaagtcttccacgtggaaatcccaactccgcagcaaattatcgccggtgttggcgcggacatgccacaagccggtcggcgcgttgatatccagcggataattcaaacggtatagtccgttttccggctggctgacgacggtacgcatcacttgtccgtctggttttaccacttccagcttaacgggttgatcgggcagcgttttaccgtcgctatcgcgcagtaatccgttgaggataaccgtttcgcccggtcggtagagatcgcgtgggccgaacataaagaactgcttgctgtagccgggcgcgccagcgacattaaactccgacagatccagagccggaagcgtgagatcgagcagcgtggtctgcccctctttacgcgccagtaatagcgccgccgctttatcagcctccagttgcacatgtccctgcgcgtcgctcgtcgcctgcgccagcgtctgccctttatcattcagaagaacgatctcaattcccgactgcgccgcgccgttttccaggctttgcgtaaagatatccagccgactatggtaacggtgcgcggacacgccgatatcgctaagggtaaacaacgtagcggcattactatagttgtagtgtccagcctgattcattaccgccacatatacgcccgcctgttgcagcggcttaatatcgcttaatggcagcagcagtttctcacgcgtattacgcgccggattaagatcaaaacgaccggtataaaccagatccgccattttcagcagattgtcggattcccagttagagagggaactacggtactcccactggctgacaaacgacgccagcgatccgggcttaacgcggaaaaagttcacatcaacgtggttgacgttaagcgccatgaccggcagtccttccgctattttccccggtagcagcgatccccggctggcaaagccgacgctgggctggacatcacgcgtggtaatcgttttttcataagacttgccgaaggtggcgttattcagcgctttaacggcgggatcaaccgtgaccaccagcacgcgctcaggttccagatgacgtaaccttagctcttttaaatttggcgccagctcccatgcgccgtcaacgctgccgcttttcttatcaaccacgtgaaccacacgggagaaatcctgttcaggatctaaaggaattgaaaacgtcagcaccagcgtcgccgcgccgtcgagctgcgcttcggaggcgtctaatagcgtgagcgctttgccctggctttgctgcgccagcttttgtagctgtgacgggtcttgcgcgggcgagggctgcgctacggctggcgcttcgcttttagtcgtcggggcggttttatcgttgttatcgcatcccgccagcgccagcatgatcatgcaggccaccacgcgtaaatgtttcatnnnnnnnnnnnttagccctgatgcggcaacaattccggttggacttgtaccggtggcttgttgctggtcagcgcggcttcatcagcctgaatggtgccggaatttgcggcacatacgccttcatgcgtgtggatgatgtgctgatgctgcgcattcacatcctcgcccattgccgcgatatgcgtgctttccgtaccgccactgttagtcgcccaggggatcacggtatcgctggcaaatcccatgccggaagtcaaggtcgcagttagcgctgccgttgtcagaaaaagtttcatnnnnnnnnnnnttagatattccgtaaagaagcaaaaagtaaagcccactcgctcttcgcgcgatagaagaccggcggcttgccaatcggcgcatccacggaaatctcaccgccgtggtgagcttcgccggtccagatattcacccagtgatcttccggcaggtacagcgtccaatcgcaacgcccctgctcgtgaaccggcgcgaccagcagatcctgaccgagcagatattgatatttcagggtgtaggtagcggcatcgttctcgtagtgcaggaatagcggacgcatgaccggcagaccggtagccgcgttttgcgccaccgcctgcttgagatacggtttcagcgtggtaaagacggtggtcatgcgggcaaagtgtgcaatagtttccgcgtcgccgtcgaactgccagtcattgctggggcggttgccttcatgggtgcgcatcatcggcgtaaaggcgctgaaatcgcaccagcgcagcagcaactctttgctgcgcttcatgtcaaacagggtggtgtagccgccgatatcgctgtgatgcagaccatggccggtcatcgccagcgacaatgcagcaggcacgacagaggccagaccatcatcaagactccagtcaacgttctggtcgcctgcccacatcatggtggaatatttctgactgccggtgtaacccgcccgcataaagaacaggatctcgccgagcttgccggtcttctgtagcgcttcgtagttacacttcgcccacagtgcgggccaggcgttatgcatgagctctgcgctgaccccgttgtgcagatacgtgtcggtcggcagatattcgccgaaatccgccatccagccgctgcagccgagcgcgatcatgttctttttgatgacatctttgaaccagtcgtaagcttcaggattagtcagatcgaccacgccgccatagaattcgccaaactcgaccagatagtcgccgcccgtggcgtctttcgccagatagccgtgtctcgccgcctcagcgcagaggtctttatcactggcgacgtatgggttgatataagagaggaactggacgccttcttctttccactgtttgatccggctatccagctgtggatagttgtcgctattccacttccagttccacatcacgcgcttgccaaaggaggtcatgcggataccggaccagtcctgcgcccaaataccgtacacttttacgcctgcgttgcgcatgttatccagtttttgctgacaaacttccgtaccgccctgtatgccgagcgtgacgccgtcgtaaacccagtccggcagctccggctggcgacctaacagcgcagtcagtttttccagcagggcgatgtaggtgtcggcacactcaaaacgcagcgtagttttatcttcccacagcgccagttcgtgatactccggcgcgctgaagtcgaaattcatatagcagctattatcgacgtggcagtaatacttctgcgtgctgacaaaggtcggttgcgggaagaaggtcaagtaatagtcgccgccggcgttctctttacagtctgcctgccaggtgacatagctggttttattacgaccaacgccctgttcgctggtccacagcgggaacggcttgccgcgcaaatcgaaataagagaactgttcgccgcagccgtagatatggtcgtctggattagctgcgaggcgtaaccagatacggttatggtgcaggtcgtcgttttgcagatccaacgtcaggcgtcccgcctcatcggcggagatgcgaagggtggcgctaattgttgcgccacggctgaattgtaccagccagccgtcgggtagctcgctgactgtggcctccgttaatgcaatcttctcgttaagtttgtctttgatgctgaagttgccgcgaaacatgtcgatgtcggcaacgcccgcgccaatccacagacaggggttttcggcgctgtggcgtaaaatcaggcgctgttgccagctaagcgcaaaaccatcctgtgatgttgtcagttcaaaatcggttgaccgttgtggtagagaattcatnnnnnnnnnnnctactcatcttcaagataagtataaccgtacagtcccgcttcaaattcctcaaggaactgctgctgcaacgcatcgtccagatccgtctgttttacctggtcgcggaaatgcgttaatagcgttttcggatccagttgcacatattgcagcatatccgcaacggtatcgccttcgtccgacaactcaacctcgacactaccatccgggaagacaaacacgtcaaccgcttcagtatcgccaaacaggttgtgcatgttaccgaggatctcctgataggcgccgaccataaagaagccgagcatcggcggattctctggatcgtattccggcatcggcatcgtcgtggcgataccgtcgccatcgatatagtggtcgatagcgccatcggaatcacaggtaatatccagcagcacggcacgacgttccggtacctgatctaacccttccagcggcagcaccggaaagagctgatcgattccccacgcgtccggcatcgactggaacagcgagaagttgacgtacattttgtccgccatccgctcttgcagttcgtcgataatcgggcgatgcgcacggttttgcgggtccagttgcttctgcacttcatggcacatgctgagataaagttgctccgcccaggcgcgctcctgcaaactaaacgcgccggaagagtagccgatatgaatatcgtgcagatccatttggctatcatgcagccattcacgcagcgagcggcgggtgccaggcttatgcatctcctgccaggtttcccacagattttgcagcgcgcgcggcgcatcttcagcaggggcggtcggatccgtgtattcgttacgctccacgccgataatgttagagaccagtaccgtatggtgcgcagtgacggcgcgcccagactcggtaatcaccgtcggatgcggtaaaccatgctcttcgcaggcatcgccaatcgcccagatgatgttattggcatattcgttcaggccatagttcaccgaacagtcggactgcgagcgggtaccttcataatccacgcccagaccgccgcccacgtcgaagcactggatattaacgcccagcttatgcagctcaacatagaaacgcgcggactcgcgcacgccggtcgcgatatcgcgaatgttcgccatctgcgatcccaggtggaagtgcaacagttgcagactgtccagacgcccagcgtcacgcagggtctccaccagttgcagcacctgcgtcgccgccaggccgaattttgatttttcgccgccggaggattgccacttaccggacccctgagaggccagacgcgcacgcacgcccaggcgaggaaccacgttcaggcgctcggcctcttccagcacaatcgcgatttcagacatcttttcgatgaccagataaaccttatggcccatcttctcgccaatcagcgccagccgaatatattcacggtctttataaccattacagacgatcacgctacgggtcatgccggcatgcgccagcaccgccatcaattccgctttcgaccccgcttccagccccaacggttcaccggaatggataagggactcgatcacgcggcgatgctgattgaccttaatcggataaacgaggaagtagtcgccgttataaccgtaagattcacgcgcacgcttaaacgccgcgttaattgaacgcaaacggtgttgcaggatctgcgggaagcagaacagcgccggcagacgctgaccttgcgcttcgcgcgctttcaccagtttggcaagatcgacacgcgcttccggtacgtcgggatcggggcatacgctaatatggcccagctcgttgacgtcgtagtagttattgccccaccaggcaatattgtaagtgcgcagcatcttgctggcttcctgggagctcattgcaacctcctgcatnnnnnnnnnnnttatccgatacgactgacttcatcaaataaggtggctaacccgctgcgccgttccgttcgcgtcacaatcgcgcctgccaggatccgttcatcggcatacagcgataaccgccgccgcgcccgcgtaacagcggtatacaccagctcccgcgtcacgaccggcgaacgttggctgggtaaaatcagcgcggcgtgatcaaattcagacccctgtgatttatgtaccgtcatcgcccaggttgtatcatgttccggcagacggctgggctgaacggacttgatcgtgccgtccggcatcacaaaccagacgcgtaacccctgcccgcgatcgagcgcaataccaatatcgccgttaaatagccccaacgcgctatcgttgcgcgcaatcattaccggacgcccttcataccagcgagagtgcggatgccgctgaatttttcgttgctgcaccatcgcctgctcaatgcggtcattcagtcccctcacgccaaatgggccttcgcgcagcgcacaaagcagttgatactcattgaaagcctgaaggattgcctccggcgccgctttttcatgcagcaaccgcaggtagcgcccatagcccgccagcgcttcatccagcatcccggcataatcgtcgctgctttgcaatgtacgcttctctatatcgctaaacccctgctgaaaaacagcctggattgccgacctgtcgccacagttaattgccgccgccagcttgccgatgccagaatcgctgccgaaacggtagctcttttgcaacaaacagaggctatcgcgtaaagacgcggcttgcgttccggcccccgccggaatggcgctaccagtgagtcgacttagctgtcgggcgcgttccgccgtaaaccctgcgttgacataggcgcaaatatcgcccaacacagcgcccgcctcaacggatgccaactgatcgcgatcgccaagaaaaatgacccgcccgtgcggcggcagagcgtcaatcaaacgtgacatcatcggcaaatcaatcattgatgcctcatcgaccaccagcacgtccagatgcagcgggttgcccgcatgatggcgtaatcgctggctgccgggctgtgcgcccagcagtcggtgcagcgtactggcgtcctccggtatacgctttttctgcgcatcggtaagaggaagctgacgcaacgccgcgccgagcgactccgtcaggcgtgcggccgctttcccggttggcgccgccagccggatacggcaacgttcgccatccgccatttgaattaatgccgccagcagcttcgcgacggtggtggttttaccggtgccgggaccgcctgaaatcacggagatacggcgagttagcgctacggcggcggccaccttttgccagttcacctcgtctgtcggagggaatagcgcgtccagaatacgggataactgatcttcatctacggcgatggcctggttaacctcgttaaaaaagcgcgcaaccgtacgctcgttgcaccacatgcgattcaggtagaggcgatcgccgcacagaattaacggcgcggggctatcgccgcagctaaccgccgcagacgccagtaaccgctttttccagtcgattggcgtagccgtttcgcttatccaggcgaccagtaagggatgcgcctcctccgttaacgttaaacgcgacaacggcagacacacgtgaccttcacctgcgtcatgactaagcagcgctgccgccagcgtcacggcgggatcgtcgttaccggcgacggttaaagcaaactgggcatcaatgggccgtaagagtttttgttcaacggcctccagcaaccgcttctggattgtcatnnnnnnnnnnnttattcctctttctgtgtgggatgctgtcggccagaaacgacctccatacgggcgccaccgagcagactgtcgctggcaatgatctgcccggcgtattgttccgtaatctcgcgcgcgacagccagccccacgccttgtcctggtcgtagggtatcggcgcgctgaccgcgatcaaacaccagggaacgtttgctgtgggctatgcctgggccgtcatcttcgacgaaaatatgcaaatgatcgtcggtctggcgagccgaaatctcgacaaactccagacaatatttacaagcgttgtccagtacgttgcccattacttcgacaaagtcgttttgctcgccgacaaaactgatctctggtgaaatatccatactgatattcacccctttacgcagataaactttattgagcgcggagatcaggttatctaacaacggcgcgacgggatgcagttcgcggcttaacaacacgccgctaccgcgcatactggcgcgatgcagataatagccgatctgctgggaaatccgactgatctgttccagcatcaccggttcagctttgctgacgctcatcttttcgttgcgtaaagagcgtaacgtactctgcaaaaccgcgagcggcgtttttaaactgtgcgtcaggtcggttaggctcgtgcggtatttgttataacgttcgtgctcgcttttgagcagttgattaaggttgcgcacaaggctgatcagcttacgcgtcgtctccggattgagcatttcgcggtgatgatcttcaagttcgcggacttcccgcgacagcgcatcgatagggcgtaagctccaccaggcggcgatccacagtaaaggaatgactaacagtaaattggcggccagcacgtatacgaaccagctccacaccatataggagccttttagctctatcggaatggtatcgaccaccacgatggttaactgcggcatccgcgtcgtggcaggataaatatttaccgctaccgagtgggtcatctccgcatcatcgtcatcttcacgtacttctttgagtttttcctgcgcggaatggtcctcgctcaacagcgtgctggtggcgtctacgttggtttcaatttcatggaagccgttcgtttttaacccttccggttgagtgcttttaatcagccaggggatgttgcgctgcgtccataataatttgcccgtttcatcgtaaatcaccgtcatggtcgggctttgcatgtccagattttcaggcagctcaacgcagattttattattttcccatttggcgagggtataaaacaggttgctttcgccgcgcagcagacgaaacgtggttttatcaaaacttacgctatagccgaccagcgccactatgccatatgccagagaaagcacgagcacgacgccggctgtcgccagcaaaaaacgaacccgcagcgacagcggcagaaaatggcgagcaaatttattcatnnnnnnnnnnntcatttttctgtgatttgttctgcaagtcgggcaatacgccttgccattccccggaaaataaacaggtgcgccgggatcatcagtagccagtaaatcaggcccggcattccgtgtggatgccaccaggcgcgcacgtcaatttcgcggtagcggcctttatcgtgcagcgtgaagctaagccgccccagacccggcgctttcatgccaaacaagagcgtgagctgtttttctggttcgacaatgatcactttccagctatctaccgtatcgccaggcttgagcaaggtatgcgacgggcggcctttcgccagtttatgccccaccagacggtccatcgcggcgcgcgtctgccacaaaatattgccgaaaaaatagccctctttgccacccagccgatttacgacctgccatagcgccgataggctggccggggtctgcgcggtaaagcccgcctgctttggaaaatagccgtattcgggacgccagcgggcgaaggccagcgcgtcgtagccccagtcgctggagttcaccagtttttcttcttctttcagcgtgcggcgaacggcgtcatcaaaggtgataagcgtttgggggatcaacttttttaacgcggcgtcatcggccagcaaatcgtgccttaatccctggattaacgcttttgcggtagttggcggcacggaggtaatgacgtttaaaaaccagaccgaaatccagcgggtcggaaaaggcaccgggatcagcggacgccgtttaccgctgacggccataaaacgttcaaactgctgctgataacttaatacctgcggcccggcggcttccagaatacgatgctcgtgcgcagggtgctccagtaagccgaccaggtagtagagtaaattttccagggcgatgggcgtggtgcgcgaacgcacccagcgcggcggcgtgagtattggcaggttgtaaaccatgtcgcgcatgacctcaaaggcggcggagcctgcgccgacgatgatcccggcgcgtaattccgtcaccggtacgcctgcgtcgcgcagcgtgtcagccgtaagctggcgggcgcgcaggtgatcggattgctcatgcgccggcgcctgcaatgaactgaggaaaataagttgtttaaccggcgtctggcgcagcgcgtcgcgcacgttgagcgccgcctgacgctcatgggcgataaagtcgccgccttcgcccatgccgtgtaccagatagtaaacggtatcaatgtcgcgaagcagcgcgggtaaattttccggccagtgcagatcgaccttatgacaactgacgttggcgaggcgatgtttttccagacgttccacgcgccgcgccgccgcccgcacctgatgtccttgctgacttagcgcaaagaccaggtgctgaccgatatagccgctggcgccgaggaccagaatgcgttgcgccacnnnnnnnnnnnctagatcacgtattcgatcaacgctggttcttgtttacagaagcgacgccagtcgacaatcggcattcgtacctgcggactgacgctaccgtcttcctccatccactctttttctattgcctgaagctgataaaaccggcttctcagacgcccttcctgcggcagcaaacgcagcgtatgctgcgccacctcgccggaaagacgctccgtcaaagcctgaaaaagctgtggtattcccacgccgctttgcgctgaaagccaaacgcggatgggtttattctcttcatctctgtcgatacgcggttcaaagtcgtccagcatatcgattttgttcatcaccattaaggtggggaattcgtgagcgtcaatctcttcaagaacggtgtttaccgcctcgatgttttcctgcacacgaacatccgccgcatcgaccacatgcagcagctgcgtcgcctgacgcgtctcctgcagggtagctttaaaggcagccaccagatcgtacggtaaatggcggataaagcctaccgtatccgccagaacggtctcaccgacatccgctacatcaatacgacgtaacgtggggtccagcgtcgcaaatagctgatctgccgcatagacccgcgcttcagtgatctgattaaaaagggtggattttccggcgttggtatagcccaccagcgataccgtcggaacgtcggccttgatgcgcgactgccgcccctgctcacgttgcttctcaactttctccaggcgcgactgaatctgcacaatgcgattacgcagtaaacgacggtcggcttcgagctgggtttcacccggaccgcgcgaaccaatcccgcctttccgacgttcaaggtgggtccagccacgcaccagacgcgtagccagatggcgtagctgcgccagctcaacctgcaacttaccttcatgggtacgcgcacgctgggcaaaaatatctaagataagaccggtgcgatcgataacccggcactcgcacaaacgctccaggtttcgctcctgggctggactcaatgcatgatcaaacaatacgaccgctgcgccagtcgctttcacggcttccgcaatttcaactgccttaccttcacctacaaagtacttcgggtgcggtgctttacggctaccggtaatcacctgcattgcttcgacaccggcgtaatagaccagagattcaaactcctggaggtcttccatatctttgtcttgcgaaaaatagatgtgtaccagtaccgcctgctcaccggcatcataacggtcaaacaannnnnnnnnnntcagccgctaaacacgttaccggcgcccggcgcgctttttaacacccagacgcgaccatagtgattataccatccggcacgatgcccggcatccgggccaatcccctggtaaatatcaaagtgctggcctttaatcgctccgccgacatccagtgcgaccatcaaacgtagctcatactgaccgctaaatttaccgttgttatccagcaacggtacttccgccaacaaggttgtgcccggcggaatgatgctgcggtcggaggcgacggatgctcgcccaatcagcggtacagcgctggcgcctttgaccggcgcaaaagattgcggtttaaagaagacgaacgacgggttctgctccagtaattcacgcacttccgcttcgctgtgcttctctccccattcgcgtatagcctgcatcgacatatcttcttttttcacttcaccgcgatcgataagcactttaccaatactgcgataaggccagccatttttaccggcataactaaagaagttcagcggactaccatcaccgaaatcaatataaccgctgccctggacatccataataaagttatccatcagcgaattactccaggccaggatgtacttatcgctcagcgcgcctgcgtagatctgggcgcgggacggtaagcgtccgcgttttggcggcatactatagatagggtactggaacgcgccctggcgcgtatggcgagcctgaacgacgggcgtatagtagcccgtgaactggacgttaccgtagttgtcggtgccttccatctgccaggcatcgataccaaactgacgcatagtgcgcgtatcgcctccggaacgtaaccagttctggacagcgttatagacgttgctttgattggtgtataaacgcggcgacgcggaacggatctggtcgacctgctcggcaaagtcaccagcattaatcggcgcgcccaccgcgtccggctggtttaccagggagaagggctgggtaaatttcccgtccttatattgctgaccgcgatcggtcggttttgatgaacaggcagccagcattgccagcattacgcctgtcgccacatattttgcccaacgtcctttcatnnnnnnnnnnntcattctgacacctccattttttgcgccattttggatgctctgtattcagggatggtggtcacaatcgcaccgactaacgcgaacagcgtaccgatgatggtgaccaggtagaccgtattgcctaatgaagggatcaattcatcaattagcactgagccaagcagttggcctgctgttgacgctacgcccagcatcaatagccctaagcctctcaccagaatcgccattagcccgatggatagcagacccagcggaccaccgagatacatccaccatgtatcgggtaactggatggtgacatggcctaatgcgatacgtatcgccagcgccgcgcccaggacacaaaagccgacgatgaagttccatgtaatggacaccagcatggagcccgttgcctcggcgactttcgcattccccgcaggctgccagccagcgagtaaccctgccaaaaaggggaggatagcgagcaggataaacgaggttgagtgccactgtggcgacacgacaaaaatggtggcgataacggcgaacaatgcgccagtaatgcgccatggcgtaaaatattttttctcctccacgccgatgccaaaacggtcgcacagcaggccggaaagaagcagagcggaaattaatgccgtttgaaaggtggcaacgcccagcgcgctggcggatgcgccttcagaaaatacgaccatcgccccgcataatcctgcaaaccaattccatagcgggatttttctctttttaatcagagtagggattgaggcgaattgctggcgtgtttctttgcgcgcaataataataaaaaacatgacgaccagaccgctggcaaacgagattactgcgcaagcattaccgtcttgtaaccaatgtcctaactgcccattaacggcagactgcatcggggaaagcataccggctaagatggtggcaagcatcagtaagggggttgagtacttattcttgttcatnnnnnnnnnnntcagttaaacggttgtaagtcgacacgcgccatcattgcggccaactgaggacgatcggtaatacccacattgctctggctgaccgccagcgcggcaaccgccgtcgccaggcgcagcgtatgttcggtggactcgcgcatcagcaggccgtaaatccatccgccaaccatggaaccgcctgcgccgacggtattttccacgtcaaccgccggtggtttagcgatccattctcctgaggcgttaacccgcagcgcgccttccgcccccagcgaaatcaccacatgagcgataccctgttcgcgtaacgcgggcgccgcatcaatcacatctttcatttccgggagcttacgacccgcccaaatttccagttcgcggcgattcggtttcaccagccacggcgcagctataagaccggcgactaactctacacggctagtatcaacgataatgcatggacactggctgcgcagacgcgtcatccagtcggtgaacgcttccggactcacgccagccggtaagctaccgcttacgcagaccatatcgaactgacccagccagctcaggaagtcgttaacaaagcgttcccagtctgcgggagtcacgtcaaagccgggaaagttgaagttggtcacttcgccatctttttccgtcagcttcacgttgatgcgggtccggccctgaaccacctgaaagcggttagcgatacccagttcgctgaataattgctgaaaaccgtcctggttatctttaccgagaaaaccgccgacagtgacgtcgatgcctaagtctttcagcacattggcaacgttaatgcctttgcccgccgcgtgcagacccggggttttcaccaggttcacnnnnnnnnnnnttaccattgcgtgccaactcccacgctgtctaaccagtctgaaaccacatcatgcgcgctgtgcgcggttaaatccacatgcaacggcgtgacggagacgtagccttcatccaccgccgcgaaatcggtatccggcccggcatcgtatttatcacccggcgggccaatccagtacaatgtattaccgcgtggatcttcctgcgggatcactttatccgctggatggcggctaccgcagcgagtcacgcggatgcctttaacctgcgctaacggtagatccgggacattcacgttgagaatacgcccggtacgcaacggctcccggcttaaccctcgcaaaagcgcgcaagtcacggctgcagccgtatcataatgctgatagccgttaagggagaccgctaatgccggaaagccgagatgacgaccttccatcgccgcggcgacagtaccggaatagatcacatcatcgcccagattcggacccgcgttaataccggaaacgacaatatccggacgcggacgcattaaggcattaacgcccagatagacgcaatcggtcggcgtccccatctgtacagcgatatcgccattatcaaaggtaaaagtacgaagcgaagattccagcgtgagggaattagacgcgccgctgcggttacgatccggggctacgacctgtacatcagcaaactcacgcagcgctttcgccagcgtttgtataccgggcgcgtgaaccccgtcatcgttactcagcaatatgcgtttnnnnnnnnnnnttagtccccttcaaggagcaatacagacacaacaataatgataaaaatggcgaaaaacgacgctgttatcatcagcgcttcaagaaacggtggatcgtacatnnnnnnnnnnn diff --git a/tests/test_data/outputs/extract/G8/blast_db/contigs.fasta.ndb b/tests/test_data/outputs/extract/G8/blast_db/contigs.fasta.ndb new file mode 100755 index 0000000000000000000000000000000000000000..5fd6f7085890c929e9fa647574a304e6e7c1d317 GIT binary patch literal 20480 zcmeI&O-jR15CGt@3T7h_@eblfJc3sVVx^$TqQ!Nu)s6HdF1xii(B|hgQK1OcMd8aL zZ(j2%Z#3(Lfo-gxxPK5vg0t5&UAV7cs0RjXF5cpex+fBMroW@J_R2I(m+4ypqiHV!bpVKM|-$ z5Jf;QFC_^mDCSj~r(kHHqhMrUWTK#8XlP|&WKKM;v(C`SHQZxX7+2aVT_NZ zSN6ez1nLaM|HzpCW7t^E%4dPJf8;IhiUN3#ngcG{;NVzUtAP=!n36^)^p?Za z3|MOyvnZNywJ}X`2(r+F0Lu{mUOC{t>Oz1|L{OZ|@5I`qa^-RUMCd21yF5U2Vaw7dliWf@BgoEwR(+V2@#Vd)lto{^ltN^MjTQE$%#7 zWbxtU$xOI@lU(1v7R`~3U!navY>ss-^$y!*u7Qn%6_epV&lqg zD^E`YCub_hH|&_U3p&fCP@;5#)G_a85lp{z?sm`a)fb~ja)c6~a@I}Z-;U{ycr-V= z)8Wu48t-3OGh5YVaW-K_2;oRTxun&N_dHrs5fu9Mm*e5wZhdqF^u7FIZ%4BPwdrKbO4$3qh8(d^1}>wRe8l=07Woud)bF~Y$>u5ZC-L{a~sr5 z?FytSSpKPzcZgmPf$dEIv@|!~stwMF4aO^-0U4|w|Ij^RL_W5nT#wp^qq#`^P~_1P zbPBDJwH*6Ba_!c}c$h z&3eC{oQa7oF3!gk8c;p{2KRsMnV61B%N>^6OEXIC3U{L}S6{9e@LF*-Xgl;s_?eV$ z-lz@BYL%`W(R!(xd%zPLHZn7l{%ttk`#bR{bjo|iCG5_LGVP|5U+JMGok)AqF98E> z&VSQ`m44cn@wI0sJXZgwlzf6{kO8~RqpbA ze;qNZP0)5@X`lfSg2d0IM&(6IqfeKkRv6BDz^U+?4t9cQZ^*HK=RjA>r_Pp<;0q&f z&JP{jEW3yeC7w1qv-sYl zx|B%11VMtHgDIARvf&A|CE%xDC{FM=`YhMATo5w(Bv4zbI!irQQ!8T3+1b45kX8a@ z@-+vdKQ4r3rogk$()G6i=53s_C9#*etJFOFeXxJ7&1g!eJtHVh6!@;%PVgg$wcAIV zmE2VgrtP8ulYpyDPd)CGIMS4+IN5dm4K%&844KV@jbU!pVb=V)f1Y0h1eo=dw2NG! zA=05pNlI^M8wa~Cn+5XwknL+Z-K4%_9M!9QTJw+{uKVUV#Lo!XgrD!?PWw&`(e|X< zm0z)>h8N43a1@WaB*3!o0lZ|pcQn+!*^K7CimqG$FS+%Kxi6k6AD$+Enms4FQPD^F zDH1l`VHCpONw?c|UKZgs9Nt@^&shjyxHbKiXv5i0gb|K*n2INNV{q2K?`WRQrqC^? z$cztPr8SW~1vAP@?m--6^N!ZAC|tw*dZIB^z|CI;2{Rc_r|r*2B0$M>#d5TU$-a4` zl;g!_EC}GJJ4=yFk8M*1gG#Yf|`6Q?hgvlC@}xI3vp z>5Ta?2EGage0ig-S>eog350_b%XhTNQN*h^F&l(zVtGk(uHn4r|29Tg!dj8MD%J~6 z&i6OxTia~M&lZCneg0Kr*Q8|(PN^7AA^P#mhTmI!A>Yad&9 zkh_{`JH6$MzhR4qWD8Q@IosNs|K>`jEKe)4)JJM-N(Yt1`{fJAQyed;q_xP!;`5`v zo>A60US~X{=9T!HDv2@3YkM+X0 z#FreBC$%Ft2!3|;B6@+8=cGh8@1ZZ&*5+IGglxV4pyBfosSw>zcpIn1$or7puI)Rz z=yLMt^FsdZ(?<=gf6lbKjR2QFZHch~?W*`!N0_M(%FcgAf(;Q!*k>ng_nV`@@-avO zrc7tMD09$mzAvvJaP&%0HeYu?81-YFr6M69s;^>Kr%w~@(2tHyb3nTCGi2smR^ATO zcH=L&1GXJ$a6gt)Y%qYvH0uP)!d7gH#l_*RS8x3h1R-*vH*;?a^)O(|7s(fUrwc?UQN_22g_*ZSuP=RBH7! zkbSr!+DAarUKk)gG!OVykD{zTv%?dWVhg5lAhA87x}MUFdq-7Mgfj@BZOY8ASM#{H z6=!QjS;GIu5_wwu*FIJx2-!}C1`z|2jaQs@$u5*l@_aUuHCyi`)y@lAklJA&-F>=h zm1c&`BK%%mG*iT24Z6z1rn!HmW=CU)^Lxs6657!-uy7`?P(4ru~NR z=6aM+5tzi?T3~G{wQi zg)V7}%(5>9C)*Dd@3DyqARWz>d;er!ASOkN#^r&%sjoRFae?*AgR}X6H55GAe(625 zG%n5jBCC)J-v2pQvA7jnGz*|k>s}w`IVXUxkmJtDN0uRln$;@pP*3PA-h(m;v8mXU z#!Y)@_T-G1GXQf)D`kofI*3cRpyK(C3t2=QsG8hDL*9z@hW7#ec&nPmXKzx|z3Urz zIqL@+|AaXF*UP~SM$p8*4DF#9pR##sy&vyV?KGly+Ucsbie%Tczy3jWuVQB-=-l8w zRWH^uUC~4Q$dh=U4R_aUg{PWLd;8U#%^7X_DLwP1?9-In>sH6PRl?hL(#6n`xf(2q z?P_XzXGN4L>U0rhPTQ|5(RclLaL&;i-;ViOch%!y-1HA=!JqX7NJ!P7gQ1ieP@2D3M{(270%}SV3N}u%sZ=>@Ca}>}%fdxeiuV~TCucWje=dHi@Yrl0 z!OjDV!|@&kU6^{!2wEZ9tO5uxejM1v8+HBNqm@eJIL&W2znA<0*=q@hUr92;j1ubf z3FGlKa$hKLvQ=|brM$UM8D%0e>w`?!r$br;6wGgH%!PSz&%Wzhb zRU-bjbH)Cfz}STNPe;!P^P2tt?u}>toRO7rZFp)kCB)6i(X}9ELOI}cw_=k3YNNx0 zMhGLwwGGcQjnn6l%J^$|FZ}HzQ!L1*>9u!ILomNkZ)j6y@n4b_IoRHJP{y>(5TLJ2 zrbd%POw99>H!(;V zH&yY7iEX>{@LRjL)zdPBstpD7nN85qGB%==vLabVV#ccPJ>mKNRGkw7lqsc{vsp}Z z+{hsT>Ns{Tm7`NvsV(siti>lAz(T&PjFjL`HGe4HYA+ViNy++*cC(!&`yn4w2pUM3 zUR%hOA4VOBj8yOf`M%z%W+633MxKPU5h7p(5^xP=tRlXVKtL8K!kME}d~5#6Bxm7@ zBBXA6PI^2Rj^dtc5h*y?k?ja%S8eIj#+HSBIYfYLg|Ov^hR;cy8_-*qgaX9Yc5#BzMW+J$%U)uGnEO=#Rif+H+k^Bw0bd^7_l*cfu$1XZqA3g^KPOq;!WqS9@p zJ=uVde#StNu8Fjrnf##9|0*BVtuCNdGt^bv_CpQ|znzo7XMT#T_?j2|a58Iy{Kg_| z8b9&}f1MQM#SwR}H~yb#r-lYtIbQ;#{`+^7FIVv!c%HE#(HHx3*vkYx9S0=QWvDk;FDQ~F`&dK_f&&Itc)d55 z$C#<>o{+-xXu@Ikn(e9f9ZL-{#xB|5k7twRK|k9%5b{OkDsXw47?>0WBYcLRo@W$l z--&E?ILFI^5{8TVBZFM8NYag@WLFt-`V+Ch?Ho2D?hO}bIQl!TER7GP(k_XHz3)2l zHMa(`m$9N@8labY%7{{o&Q>&OXqFDfR-beNql|z%3A&|;E<7w5(PFrZ}gJhE)sn}E|r2XFLk#j=&MesoRv4QQ*@2Nv6zfR8W&s0 zJ(Ls#H)q!<;33S1qn>FBGBkC-dWYVI`7YW65x|E7z97&Ja^14m9B@R?}miVkW)D=AFkkVu=0Ud>$TE!h?{lULA76-C_$B(y`=NATZ; zRvv}ujHUUfIWcUp$>_0UN47ML;v~V^I~V7azhHRKWuEG!M?&>sE}~ z^xF7^Jy$)$6QZ z^-bfP`L2}=(GYFdrfuA?y@w>OHF0G%vYSRH>I?_=W}ND#l44x<#)RlJr9I25*eGdV z0DbDRpfwI3BYDOIh4^|*b!C|m?j9}!TE!r!V&1aBwoCa93~8e>K8q^CLvu|OU^f!VNA+vZ zNui?SXt5uYb1^57r`OF{)qU(!hNFs#Amv5pJbVPph@s;v?Y*Uat?dqMs(#1y%t&2X zg?$a3P@xteCc``S5Tk@8dGN3K!5^%{Khd^LjaJ!8Y7IWr*bY7U?aRG|@izR6n3h&_ z`;(CX$VGi~?RAUM?oDPh8R;Z|=L3mB1oxNf@fO5|$?25Jj)zr|7#z#~MJpfZZU@fQ zuSNE$b3>zc|)!h1|5! zfRbL6C@XtQOkdZ4a|TuEPU^S%_doP4M_t531?P`UKN%nL)A+O!vdcKBfsuDs%o_rF zF44`^k9=NY=emN^_02-zOr$}NNI>)RuO^od6Ctiqovf^8%*!Q`$9CQA+brQ%XEAOK zF3U39X%c4aAdir(uQ-!rL%yt}Uv)04i?=*S`*b2+7d|{#%ZzsE7VL|D@+;=dbd}xJ ziM&NTNn|RidOqkP2R&|(TAdTnCVdHT{;~DP(BTF7zDczEVVS0J_^>|R#tjxYgGJoC zs{gPYMuUaycF*Q}E+t#AxNHcw`htu++?0aBK*KP~Wgg^G!4;-3JAkd(Uf;1@O3Jq%8M-9pb sqd_@*<37FyMHtecyvj5ve|HTkC_sY>{pNxHr$I%0%SHXIL6JiKA5sT1;s5{u literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G8/blast_db/contigs.fasta.ntf b/tests/test_data/outputs/extract/G8/blast_db/contigs.fasta.ntf new file mode 100755 index 0000000000000000000000000000000000000000..005ac416d78d808991db961db8e272a0664078b1 GIT binary patch literal 16384 zcmeI&O=`kW5CGt5>cWK(D?Lc@1YSi@p{p+HqW9@Fr1T_$d!a|DnwR0}#*Gvzd|71X z{g6z)-S85T*RV^r-#=duIdzJkyQgkAju_6m?CR~ezpidNTg?3z?>t+V5+Fc;009C7 z2oNAZfB*pkw+qz1`!D+c`t+KWnseOrpYYhnoWK3cJb?fK0t5&UAV7cs0RjXF5J-XS zzayHM#HQV}xgruEK!5-N0t5&UAV7cs0RsO);N@6E6QfwqKVBEpx7F%>xjxnP9Q72v tW~JgB=Xra^(*N(D`o%y literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G8/blast_db/contigs.fasta.nto b/tests/test_data/outputs/extract/G8/blast_db/contigs.fasta.nto new file mode 100755 index 0000000000000000000000000000000000000000..20d5cb86e6dff1f3684dc229a358a2ea697cecfb GIT binary patch literal 8 KcmZQ%fB*mh5C8%I literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G8/filtered.hsps.txt b/tests/test_data/outputs/extract/G8/filtered.hsps.txt new file mode 100755 index 0000000..7b27e77 --- /dev/null +++ b/tests/test_data/outputs/extract/G8/filtered.hsps.txt @@ -0,0 +1,21 @@ +qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore locus_name ext_start ext_end is_5prime_complete is_3prime_complete is_complete is_5prime_boundary is_3prime_boundary is_on_boundary reverse complement is_extended is_5p_extended is_3p_extended +0 0 102 22872 0 101 22860 22759 102 0 100.0 100 100 minus 1.84e-51 189 locus_1 22759 22860 True True True False False False True False False False False +9 0 762 22872 3 761 22747 21986 759 0 100.0 99 99 minus 0.0 1402 locus_10 21986 22747 False True False False False False True False True True False +10 0 858 22872 0 857 21974 21117 858 0 100.0 100 100 minus 0.0 1585 locus_11 21117 21974 True True True False False False True False False False False +11 0 972 22872 0 971 21105 20134 972 0 100.0 100 100 minus 0.0 1796 locus_12 20134 21105 True True True False False False True False False False False +12 0 1098 22872 0 1097 20122 19025 1098 0 100.0 100 100 minus 0.0 2028 locus_13 19025 20122 True True True False False False True False False False False +13 0 1281 22872 0 1280 19013 17733 1281 0 100.0 100 100 minus 0.0 2366 locus_14 17733 19013 True True True False False False True False False False False +14 0 1434 22872 0 1433 17721 16288 1434 0 100.0 100 100 minus 0.0 2649 locus_15 16288 17721 True True True False False False True False False False False +15 0 1464 22872 0 1463 16276 14813 1464 0 100.0 100 100 minus 0.0 2704 locus_16 14813 16276 True True True False False False True False False False False +16 0 1836 22872 0 1835 14801 12966 1836 0 100.0 100 100 minus 0.0 3391 locus_17 12966 14801 True True True False False False True False False False False +17 0 1914 22872 0 1913 12954 11041 1914 0 100.0 100 100 minus 0.0 3535 locus_18 11041 12954 True True True False False False True False False False False +18 0 2037 22872 0 2036 11029 8993 2037 0 100.0 100 100 minus 0.0 3762 locus_19 8993 11029 True True True False False False True False False False False +1 0 285 22872 0 284 8981 8697 285 0 100.0 100 100 minus 1.05e-152 527 locus_2 8697 8981 True True True False False False True False False False False +19 0 4935 22872 0 4934 8685 3751 4935 0 100.0 100 100 minus 0.0 9114 locus_20 3751 8685 True True True False False False True False False False False +2 0 327 22872 0 326 3739 3413 327 0 100.0 100 100 minus 5.47e-176 604 locus_3 3413 3739 True True True False False False True False False False False +3 0 417 22872 0 416 3401 2985 417 0 100.0 100 100 minus 0.0 771 locus_4 2985 3401 True True True False False False True False False False False +4 0 444 22872 0 443 2973 2530 444 0 100.0 100 100 minus 0.0 821 locus_5 2530 2973 True True True False False False True False False False False +5 0 543 22872 0 542 2518 1976 543 0 100.0 100 100 minus 0.0 1003 locus_6 1976 2518 True True True False False False True False False False False +6 0 606 22872 0 605 1964 1359 606 0 100.0 100 100 minus 0.0 1120 locus_7 1359 1964 True True True False False False True False False False False +7 0 642 22872 0 641 1347 706 642 0 100.0 100 100 minus 0.0 1186 locus_8 706 1347 True True True False False False True False False False False +8 0 684 22872 0 683 694 11 684 0 100.0 100 100 minus 0.0 1264 locus_9 11 694 True True True False False False True False False False False diff --git a/tests/test_data/outputs/extract/G8/processed.extracted.seqs.fasta b/tests/test_data/outputs/extract/G8/processed.extracted.seqs.fasta new file mode 100755 index 0000000..28abcd4 --- /dev/null +++ b/tests/test_data/outputs/extract/G8/processed.extracted.seqs.fasta @@ -0,0 +1,40 @@ +>locus_1:0:0:0 +atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa +>locus_10:9:0:1 +aaacgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa +>locus_11:10:0:2 +gtgaacctggtgaaaaccccgggtctgcacgcggcgggcaaaggcattaacgttgccaatgtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccaacttcaactttcccggctttgacgtgactcccgcagactgggaacgctttgttaacgacttcctgagctggctgggtcagttcgatatggtctgcgtaagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatcgttgatactagccgtgtagagttagtcgccggtcttatagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcccgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgcgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggaaaataccgtcggcgcaggcggttccatggttggcggatggatttacggcctgctgatgcgcgagtccaccgaacatacgctgcgcctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga +>locus_12:11:0:3 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>locus_13:12:0:4 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>locus_14:13:0:5 +ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctattacgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccagcgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcggaaaggcgggattggttcgcgcggtccgggtgaaacccagctcgaagccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgagaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgcagctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaattccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtccgcaggtacgaatgccgattgtcgactggcgtcgcttctgtaaacaagaaccagcgttgatcgaatacgtgatctag +>locus_15:14:0:6 +gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga +>locus_16:15:0:7 +atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctcgtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatctgcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacggtgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcactcaaccggaagggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaaggctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgatgcgctgtcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtaagctgatcagccttgtgcgcaaccttaatcaactgctcaaaagcgagcacgaacgttataacaaataccgcacgagcctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatctgcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcatagcccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttctggccgacagcatcccacacagaaagaggaataa +>locus_17:16:0:8 +atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa +>locus_18:17:0:9 +atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag +>locus_19:18:0:10 +atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacatcgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccacagtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggtcgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattacttgaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcatacagggcggtacggaagtttgtcagcaaaaactggataacatgcgcaacgcaggcgtaaaagtgtacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggggtcagcgcagagctcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacagaagaccggcaagctcggcgagatcctgttctttatgcgggcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccagcaatgactggcagttcgacggcgacgcggaaactattgcacactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgagatttccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa +>locus_2:1:0:11 +atgaaactttttctgacaacggcagcgctaactgcgaccttgacttccggcatgggatttgccagcgataccgtgatcccctgggcgactaacagtggcggtacggaaagcacgcatatcgcggcaatgggcgaggatgtgaatgcgcagcatcagcacatcatccacacgcatgaaggcgtatgtgccgcaaattccggcaccattcaggctgatgaagccgcgctgaccagcaacaagccaccggtacaagtccaaccggaattgttgccgcatcagggctaa +>locus_20:19:0:12 +atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa +>locus_3:2:0:13 +atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa +>locus_4:3:0:14 +ctgaaactaatacctttttatttgctcgcgctattttctgcctcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaacccctgcagataacccgccttataccgtgacaatattgccgccagtagaagcctgcggggaaaaatgtgtgaaactgaacgtagagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaacgacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctacaaccgacagctgcaattacttagggctggcgtaataccgaaaaataaaaagtggaaatggataacgaagatggaaaattactggttcaggcgggtaaaagcgcactaa +>locus_5:4:0:15 +atgaaaaaatttgatagctgtcatcctgtctttttactgatcggctgtgcccaggtgcctctccgttcctccgtgagcaaaccggtacagcaacctagcgctcagaaagagcaactggccaacgcaaatggtattgatgagtgtcagtcacttccgtatgtgccttcagaccttgcgaagaataaatcattatcaaaccagatcgctgataataccgcatcaaaaaatagcgcaatcagctcacgcattttttgcgaaaaatataagcaaaccaaagaacaggcgcttaccttcttccaggaacttccacaatacatgcgttcgaaagaagttgaagagcaacacatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa +>locus_6:5:0:16 +atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa +>locus_7:6:0:17 +gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggactcacaacatgcctgtagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgccagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcgccatccagtcggcaacctggttttcggcaggcaggtcgcgtagggcggttaaaaaaatgatggattgcggcagcggtatttattcttcaatcaatacgttactgaaaaattcgcagattaaaaatatagtcattttaacccataatcattgtttgacgtatattgtaaaaaataagcgcggcgtgaagtttgatccggagtatcttaatgcgctcgttatgtatgcggaaaacggaaaactccttttagacggtgaatttgttcccggctag +>locus_8:7:0:18 +atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga +>locus_9:8:0:19 +atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/extract/G8/raw.extracted.seqs.fasta b/tests/test_data/outputs/extract/G8/raw.extracted.seqs.fasta new file mode 100755 index 0000000..28abcd4 --- /dev/null +++ b/tests/test_data/outputs/extract/G8/raw.extracted.seqs.fasta @@ -0,0 +1,40 @@ +>locus_1:0:0:0 +atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa +>locus_10:9:0:1 +aaacgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa +>locus_11:10:0:2 +gtgaacctggtgaaaaccccgggtctgcacgcggcgggcaaaggcattaacgttgccaatgtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccaacttcaactttcccggctttgacgtgactcccgcagactgggaacgctttgttaacgacttcctgagctggctgggtcagttcgatatggtctgcgtaagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatcgttgatactagccgtgtagagttagtcgccggtcttatagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcccgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgcgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggaaaataccgtcggcgcaggcggttccatggttggcggatggatttacggcctgctgatgcgcgagtccaccgaacatacgctgcgcctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga +>locus_12:11:0:3 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>locus_13:12:0:4 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>locus_14:13:0:5 +ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctattacgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccagcgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcggaaaggcgggattggttcgcgcggtccgggtgaaacccagctcgaagccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgagaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgcagctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaattccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtccgcaggtacgaatgccgattgtcgactggcgtcgcttctgtaaacaagaaccagcgttgatcgaatacgtgatctag +>locus_15:14:0:6 +gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga +>locus_16:15:0:7 +atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctcgtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatctgcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacggtgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcactcaaccggaagggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaaggctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgatgcgctgtcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtaagctgatcagccttgtgcgcaaccttaatcaactgctcaaaagcgagcacgaacgttataacaaataccgcacgagcctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatctgcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcatagcccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttctggccgacagcatcccacacagaaagaggaataa +>locus_17:16:0:8 +atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa +>locus_18:17:0:9 +atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag +>locus_19:18:0:10 +atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacatcgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccacagtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggtcgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattacttgaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcatacagggcggtacggaagtttgtcagcaaaaactggataacatgcgcaacgcaggcgtaaaagtgtacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggggtcagcgcagagctcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacagaagaccggcaagctcggcgagatcctgttctttatgcgggcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccagcaatgactggcagttcgacggcgacgcggaaactattgcacactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgagatttccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa +>locus_2:1:0:11 +atgaaactttttctgacaacggcagcgctaactgcgaccttgacttccggcatgggatttgccagcgataccgtgatcccctgggcgactaacagtggcggtacggaaagcacgcatatcgcggcaatgggcgaggatgtgaatgcgcagcatcagcacatcatccacacgcatgaaggcgtatgtgccgcaaattccggcaccattcaggctgatgaagccgcgctgaccagcaacaagccaccggtacaagtccaaccggaattgttgccgcatcagggctaa +>locus_20:19:0:12 +atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa +>locus_3:2:0:13 +atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa +>locus_4:3:0:14 +ctgaaactaatacctttttatttgctcgcgctattttctgcctcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaacccctgcagataacccgccttataccgtgacaatattgccgccagtagaagcctgcggggaaaaatgtgtgaaactgaacgtagagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaacgacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctacaaccgacagctgcaattacttagggctggcgtaataccgaaaaataaaaagtggaaatggataacgaagatggaaaattactggttcaggcgggtaaaagcgcactaa +>locus_5:4:0:15 +atgaaaaaatttgatagctgtcatcctgtctttttactgatcggctgtgcccaggtgcctctccgttcctccgtgagcaaaccggtacagcaacctagcgctcagaaagagcaactggccaacgcaaatggtattgatgagtgtcagtcacttccgtatgtgccttcagaccttgcgaagaataaatcattatcaaaccagatcgctgataataccgcatcaaaaaatagcgcaatcagctcacgcattttttgcgaaaaatataagcaaaccaaagaacaggcgcttaccttcttccaggaacttccacaatacatgcgttcgaaagaagttgaagagcaacacatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa +>locus_6:5:0:16 +atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa +>locus_7:6:0:17 +gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggactcacaacatgcctgtagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgccagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcgccatccagtcggcaacctggttttcggcaggcaggtcgcgtagggcggttaaaaaaatgatggattgcggcagcggtatttattcttcaatcaatacgttactgaaaaattcgcagattaaaaatatagtcattttaacccataatcattgtttgacgtatattgtaaaaaataagcgcggcgtgaagtttgatccggagtatcttaatgcgctcgttatgtatgcggaaaacggaaaactccttttagacggtgaatttgttcccggctag +>locus_8:7:0:18 +atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga +>locus_9:8:0:19 +atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/extract/G8/seq_data.txt b/tests/test_data/outputs/extract/G8/seq_data.txt new file mode 100755 index 0000000..89af344 --- /dev/null +++ b/tests/test_data/outputs/extract/G8/seq_data.txt @@ -0,0 +1,21 @@ +id seqid locus_name query_id qlen start end sub_start sub_ent ident qcovs bitscore reverse complement is_complete is_trunc fivep_trunc threep_trunc is_extended is_5p_extended is_3p_extended seq start_codon stop_codon is_stop_valid is_start_valid is_cds_valid +0 0 locus_1 0 102 22759 22861 22860 22759 100.0 100 189 True False True False False False False False False atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa atg taa True True True +1 0 locus_10 9 762 21986 22748 22747 21986 100.0 99 1402 True False False False False False True True False aaacgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa aaa taa True False False +2 0 locus_11 10 858 21117 21975 21974 21117 100.0 100 1585 True False True False False False False False False gtgaacctggtgaaaaccccgggtctgcacgcggcgggcaaaggcattaacgttgccaatgtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccaacttcaactttcccggctttgacgtgactcccgcagactgggaacgctttgttaacgacttcctgagctggctgggtcagttcgatatggtctgcgtaagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatcgttgatactagccgtgtagagttagtcgccggtcttatagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcccgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgcgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggaaaataccgtcggcgcaggcggttccatggttggcggatggatttacggcctgctgatgcgcgagtccaccgaacatacgctgcgcctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga gtg tga True True True +3 0 locus_12 11 972 20134 21106 21105 20134 100.0 100 1796 True False True False False False False False False atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga atg tga True True True +4 0 locus_13 12 1098 19025 20123 20122 19025 100.0 100 2028 True False True False False False False False False atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga atg tga True True True +5 0 locus_14 13 1281 17733 19014 19013 17733 100.0 100 2366 True False True False False False False False False ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctattacgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccagcgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcggaaaggcgggattggttcgcgcggtccgggtgaaacccagctcgaagccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgagaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgcagctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaattccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtccgcaggtacgaatgccgattgtcgactggcgtcgcttctgtaaacaagaaccagcgttgatcgaatacgtgatctag ttg tag True True True +6 0 locus_15 14 1434 16288 17722 17721 16288 100.0 100 2649 True False True False False False False False False gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga gtg tga True True True +7 0 locus_16 15 1464 14813 16277 16276 14813 100.0 100 2704 True False True False False False False False False atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctcgtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatctgcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacggtgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcactcaaccggaagggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaaggctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgatgcgctgtcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtaagctgatcagccttgtgcgcaaccttaatcaactgctcaaaagcgagcacgaacgttataacaaataccgcacgagcctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatctgcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcatagcccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttctggccgacagcatcccacacagaaagaggaataa atg taa True True True +8 0 locus_17 16 1836 12966 14802 14801 12966 100.0 100 3391 True False True False False False False False False atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa atg taa True True True +9 0 locus_18 17 1914 11041 12955 12954 11041 100.0 100 3535 True False True False False False False False False atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag atg tag True True True +10 0 locus_19 18 2037 8993 11030 11029 8993 100.0 100 3762 True False True False False False False False False atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacatcgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccacagtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggtcgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattacttgaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcatacagggcggtacggaagtttgtcagcaaaaactggataacatgcgcaacgcaggcgtaaaagtgtacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggggtcagcgcagagctcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacagaagaccggcaagctcggcgagatcctgttctttatgcgggcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccagcaatgactggcagttcgacggcgacgcggaaactattgcacactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgagatttccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa atg taa True True True +11 0 locus_2 1 285 8697 8982 8981 8697 100.0 100 527 True False True False False False False False False atgaaactttttctgacaacggcagcgctaactgcgaccttgacttccggcatgggatttgccagcgataccgtgatcccctgggcgactaacagtggcggtacggaaagcacgcatatcgcggcaatgggcgaggatgtgaatgcgcagcatcagcacatcatccacacgcatgaaggcgtatgtgccgcaaattccggcaccattcaggctgatgaagccgcgctgaccagcaacaagccaccggtacaagtccaaccggaattgttgccgcatcagggctaa atg taa True True True +12 0 locus_20 19 4935 3751 8686 8685 3751 100.0 100 9114 True False True False False False False False False atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa atg taa True True True +13 0 locus_3 2 327 3413 3740 3739 3413 100.0 100 604 True False True False False False False False False atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa atg taa True True True +14 0 locus_4 3 417 2985 3402 3401 2985 100.0 100 771 True False True False False False False False False ctgaaactaatacctttttatttgctcgcgctattttctgcctcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaacccctgcagataacccgccttataccgtgacaatattgccgccagtagaagcctgcggggaaaaatgtgtgaaactgaacgtagagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaacgacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctacaaccgacagctgcaattacttagggctggcgtaataccgaaaaataaaaagtggaaatggataacgaagatggaaaattactggttcaggcgggtaaaagcgcactaa ctg taa True True True +15 0 locus_5 4 444 2530 2974 2973 2530 100.0 100 821 True False True False False False False False False atgaaaaaatttgatagctgtcatcctgtctttttactgatcggctgtgcccaggtgcctctccgttcctccgtgagcaaaccggtacagcaacctagcgctcagaaagagcaactggccaacgcaaatggtattgatgagtgtcagtcacttccgtatgtgccttcagaccttgcgaagaataaatcattatcaaaccagatcgctgataataccgcatcaaaaaatagcgcaatcagctcacgcattttttgcgaaaaatataagcaaaccaaagaacaggcgcttaccttcttccaggaacttccacaatacatgcgttcgaaagaagttgaagagcaacacatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa atg taa True True True +16 0 locus_6 5 543 1976 2519 2518 1976 100.0 100 1003 True False True False False False False False False atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa atg taa True True True +17 0 locus_7 6 606 1359 1965 1964 1359 100.0 100 1120 True False True False False False False False False gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggactcacaacatgcctgtagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgccagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcgccatccagtcggcaacctggttttcggcaggcaggtcgcgtagggcggttaaaaaaatgatggattgcggcagcggtatttattcttcaatcaatacgttactgaaaaattcgcagattaaaaatatagtcattttaacccataatcattgtttgacgtatattgtaaaaaataagcgcggcgtgaagtttgatccggagtatcttaatgcgctcgttatgtatgcggaaaacggaaaactccttttagacggtgaatttgttcccggctag gtg tag True True True +18 0 locus_8 7 642 706 1348 1347 706 100.0 100 1186 True False True False False False False False False atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga atg tga True True True +19 0 locus_9 8 684 11 695 694 11 100.0 100 1264 True False True False False False False False False atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag atg tag True True True diff --git a/tests/test_data/outputs/extract/G9/blast/hsps.txt b/tests/test_data/outputs/extract/G9/blast/hsps.txt new file mode 100755 index 0000000..647b11a --- /dev/null +++ b/tests/test_data/outputs/extract/G9/blast/hsps.txt @@ -0,0 +1,20 @@ +0 0 102 22584 1 102 12 113 102 0 100.000 100 100 plus 1.82e-51 189 +1 0 285 22584 1 285 13603 13887 285 17 94.035 100 100 plus 2.33e-124 433 +2 0 327 22584 1 327 18845 19171 327 0 100.000 100 100 plus 5.40e-176 604 +3 0 417 22584 1 417 19183 19599 417 11 97.362 100 100 plus 0.0 710 +4 0 444 22584 1 444 19611 20054 444 15 96.622 100 100 plus 0.0 737 +5 0 543 22584 1 543 20066 20608 543 0 100.000 100 100 plus 0.0 1003 +6 0 606 22584 1 606 20620 21225 606 15 97.525 100 100 plus 0.0 1037 +7 0 642 22584 1 642 21237 21878 642 0 100.000 100 100 plus 0.0 1186 +8 0 684 22584 1 684 21890 22573 684 0 100.000 100 100 plus 0.0 1264 +9 0 762 22584 1 474 125 598 474 0 100.000 62 62 plus 0.0 876 +10 0 858 22584 1 858 610 1467 858 19 97.786 100 100 plus 0.0 1480 +11 0 972 22584 1 972 1479 2450 972 0 100.000 100 100 plus 0.0 1796 +12 0 1098 22584 1 1098 2462 3559 1098 0 100.000 100 100 plus 0.0 2028 +13 0 1281 22584 1 1281 3571 4851 1281 11 99.141 100 100 plus 0.0 2305 +14 0 1434 22584 1 1434 4863 6296 1434 0 100.000 100 100 plus 0.0 2649 +15 0 1464 22584 1 1464 6308 7771 1464 15 98.975 100 100 plus 0.0 2621 +16 0 1836 22584 1 1836 7783 9618 1836 0 100.000 100 100 plus 0.0 3391 +17 0 1914 22584 1 1914 9630 11543 1914 0 100.000 100 100 plus 0.0 3535 +18 0 2037 22584 1 2037 11555 13591 2037 16 99.215 100 100 plus 0.0 3674 +19 0 4935 22584 1 4935 13899 18833 4935 0 100.000 100 100 plus 0.0 9114 diff --git a/tests/test_data/outputs/extract/G9/blast_db/contigs.fasta b/tests/test_data/outputs/extract/G9/blast_db/contigs.fasta new file mode 100755 index 0000000..ee2376b --- /dev/null +++ b/tests/test_data/outputs/extract/G9/blast_db/contigs.fasta @@ -0,0 +1,2 @@ +>0 +nnnnnnnnnnnatgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaannnnnnnnnnnatgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgnnnnnnnnnnngtgaacctggtgaaaaccacgggtctgcacgcggcgggcaaaggcattaacgttgccaaagtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccgacttcaacttttccggctttgacgtgactcccgcagactgggaacgctttgttaacgactccctgagctggctgggtcagttcgatatggtctgcgtcagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatctttgatagtagccgtgaagcgttagtcgccggtcttaaagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcacgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgtgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggtaagtaccgtcggcgcaggcgattccatggttggcggattgatttacggcctgctgatgcgcgagtccagcgaacatacgctgcgtctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactgannnnnnnnnnnatgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatgannnnnnnnnnnatgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctgannnnnnnnnnnttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctcttccgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccaacgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcagaaaggcgggattggtttgcgcggtccgggtgaaacccagctcgaaaccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgaaaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgctgctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaaatccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtctgcaggtacgaatgccgattgtcgactggcgtcgcctctgtaaacaagaaccagcgttgatcgaatacgtgatctagnnnnnnnnnnngtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatgannnnnnnnnnnatgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctggtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatcagcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacgctgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcattcaaccggaatggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaacgctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgaggcgctggcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtgagctgaccagccttgtgcgcaaccttaatcaactgctcaaaagcgagcgcgaacgttataacaaataccgcacgaccctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatcagcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcataccccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttttggccgacagcatcccacacagaaagaggaataannnnnnnnnnnatgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataannnnnnnnnnnatgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtagnnnnnnnnnnnatgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacattgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccaccgtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggccgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattactggaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcattcagggcggtacggaagtttgtcagcaaaaactggataccatgcgcaacgcaggcgtaaaagtgaacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggcgtcagcgcagagatcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacaggagaccggcaagctcggcgagatcctgttctttatgcgtgcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccggcaataactggcagttcgacggcgacgcggaaactattgcccactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgaaattaccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaannnnnnnnnnnatgaaactttttctgacaacggcagcgctaactgcgaccttgatttccggcatggcatttgccagcgatcccgtcatcccctgggcgactaacagcggcggtacggaaagcacgcatattgcggcaatgggcgaggatttgaatgcgcagcatcagcagatcacccacacgcatgaaggcgtatgggcggccaattccggcagcattcaggctgatgaagccgcgctgaccagcaacaagccgccggtacaaggccacccggaattgatgccgcatcagggctaannnnnnnnnnnatgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaannnnnnnnnnnatggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaannnnnnnnnnnctgaaactaatacctttttatttgctcgcgctattttctgccgcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaaccactgcagataacccgccttataccatgacaatattgccgccagtagaaggctgcggggaaaaatgtgtgaaactgaacgtacagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaaggacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctataaccgacagctgcaattactttgggctggcgtaataccgaaaaataaaaaggggaaatggataacgaaaatggaaaattactggttcaggcaggtaaaagcgcactaannnnnnnnnnnatgaaaaaattttatagctgtcttcctgtctttttactgatcggctgtgcccaggtgcccctcccttcctccgtgagcaaaccggtacagcaacctggcgctcagaaagagcaactggccaacgcaaatagtattgatgagtgtcagtctcttccgtatgtgccgtcagaccttgcgaagaataaatcattatcaaaccagaacgctgataattccgcatcaaaaaatagcgcaatcagctcaagcattttttgcgaaaaatataaacaaaccaaagaacaggcgcttaccttcttccaggaacatccacaatacatgcgttcgaaagaagatgaagagcaactcatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataannnnnnnnnnnatgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaannnnnnnnnnngtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggcctcacaacatgcctggagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgtcagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcaccatccagtcggcaacctggttttccgcaggcaggtcgcttacggcggataaaaaaatgatggattgcggcagcggtatttatgcttcaatcaatacgttactgaaaaagtcgcagaataaaaatatagtcatttttacccataatcattgtttgacgtatattgcaaaaaataagcgcggcgtgaagtttgatccggactatcttaatgcgctcgttatgtatgcggaaaacggaaaactctttttagacggtgaatttgttcccggctagnnnnnnnnnnnatgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatgannnnnnnnnnnatgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctagnnnnnnnnnnn diff --git a/tests/test_data/outputs/extract/G9/blast_db/contigs.fasta.ndb b/tests/test_data/outputs/extract/G9/blast_db/contigs.fasta.ndb new file mode 100755 index 0000000000000000000000000000000000000000..5fd6f7085890c929e9fa647574a304e6e7c1d317 GIT binary patch literal 20480 zcmeI&O-jR15CGt@3T7h_@eblfJc3sVVx^$TqQ!Nu)s6HdF1xii(B|hgQK1OcMd8aL zZ(j2%Z#3(Lfo-gxxPK5vg0t5&UAV7cs0RjXF5cpex+fBMroW@J_R2I(m+4ypqiHV!bpVKM|-$ z5Jf;QFC_^mDCSj~r(kHHqhMrUWTK#8XlP|n*xTJGrbJBLUeKW(3r;hBC+?8V1rrEdY zOVKAaR4(Djr4F6WNlGQ7H1~~Cl8#&adY|Wg|9GG0{r7!eK;dWoj(&*yHj~A7BsQ!m zLRX&mw7!40_)2->dku7wAo5ARMbEwY45mExW1*TX+4;rM+dJYe#tiBR*|Oi~Pq0eg zvIaNLrsaFm-BOG^uZT^3;=8A~>yNFE2mA7uxqY=A()uh>QihUY^2~(o3uA-c-z& zf8zXc%HlZDd(?tw*LWICo7nzQd^KvKv$FOLX2hDCpfjCr96J=quGAEDw1(|Y);m?+ zA8$3l`8v>^Z+xZZvtX{53e~c7F84Nnu8fHY#wZaBzy@8HbLMGU-{s-PKNx4O<5oY? zUXDOe-M##2UY+#OEL+`JUn+Y>Qw_^_!tnPYq3#R++BM=oDLALF6Hyh_8dL?STk2bR zGtNFDU1`&}FahIQK<%4uCuSPIA~#|j;F%iC(QR2}ERFqWtEGHr6k8~n_gsouSYh)< z;q&3tpI9=8hPILsX>Yi#*G=!p!J~Vw2}aR`{!{+k4rP3%bB3Pd?Wez-N}e1yerqS6 zoR1v4EB-hY^v|a0zqZH;i^2yCraVNtWx&v?WqY5(E)>H`Y^ki5)N%hmk)I|E1a-+;DlhehP%u`Ed&GYhL?Z_n72| zG<&&os#rI9-NBU)(7n@VEB+~2n5?`LuPrRHI(=#P?+eO>m!Cfgy6&1)89(RXK7gMF zCVYnXyCyqr4fgY70G>PQviN^&57EqB5*W-#NO1)9dWe_u+|Sh1jYBlJpIho{Vwd`I zT7wt^Hp60X!t|NN-$?!8uJ=F%xbj!nQ(Md9n3Qnc8q25gXzXZ zxn=n0*u=-o_R_|EYn4JmFmo_x_JQ3xY=$J?%IXy;zU(=p9k^`0c; zJYM)(?fEZRnYz+4E(g@=Q;MBS?M-uZuyHq_XX8Z|mUL04&Sw57!Dmh=XH`O-_RNTGU0>EOF((&gf_VB(vp=~)WqvA%P^r8I6*J}zrzkJn} zKen}HoveCG<}_f}lSVt`;`R%Sh;^IaRqNjuE^Lru6EtaRAt!Em;kjt!#l>wB9hGcZ z(iA&KXHed{|J}3tvn71{aOSO6(gFh8FdNrkmH4_|4qYklOC#PYB zcT3?jRu}|EpItYC#f*?pp+dAzY}segjWm~ZVkp3iBWs2>2QtB`Cu|#H&je2Oi)GiK zj~AXlpfvpt&F46*^c|j!qeJTP6>KMOTW-8VE^kuQ+&>}p8f@C8;pQOMj(VGEGknS1 z@aJRe8);!C=#Y@_ck zmh7_K7Q90$WlKSoB?N3@D*WqOpX>Jz9k^Dyp%dhNq4_v*AG*r}MU|OS+KS?EUL6WQ zO#pESbBnk|d$aGWpyy*Kg)tHk`HTmXiL({iEI5}GM;R`ig=1X?*}_ZA;)N=LB&2O) zblAf{G6*%@gN>QN7gbubuM?qxQ;_Wu*jyC%U#XZPCiSfAwiT{o);yz#a*T{T8qt*2 zXMz+C47Gi<3^j`qhb1yJxrU@d{Xkzt?7@_^Yc9MD!_Dnq^ zXs|lxl48?L899{jUk6y8q(LqAlv;E9@wI^f-x(ne20cxOn#0rOxza-5J$FUO*z}8fwB7cz zX;uwjZjlVVq84K zFni!X>4(QtxsHCF7jk}&jK&$Q0L)9$HQ`P!?roQUx=mi4z&?Y+hqMcJmCX}v*gftd zF#HL4hyCXVET}7rP>7fR?nr(Eo^agLjc>o9?B1#Bd3La1&oVm?psn`r^c2KF8(HrR zmZ3fG&wj07ZlM+UihKvDe2bsUI}h1n=AjRxU>z)W`jtl7h)yAh0zNxi8i0#E_|vH+OA(O%67GuE~>7zY6B;`F8UC{SJQMFC>^=3%tjU}WL@6n z!Kl`M_UB#kGPq*darGSIgNKM}gQ$ zRP^AfXN)?dHOjRDh5chS*$n3JRmA`(o9InF*~w?5V{1*%;ev2uk-RAekwPRE;(`g4 zIxG=ni8S|*hyn@=$N64?t3J3PG9xzj`PylTqF_&X-vWxJw4%<7#a{lxnl34ZNwZ%^ zdp3St+~vnSOmK4;u6E7crlyu{UOP){3k=Bo5{Wf zI52t5#18X?GMF1{U;UW;0aqzy*SMt+=ow|vLb$LI#AKkvn~(O-Ehy_6y34e2JJ*wT zjCouN&kWy3m^kp4^Z4|Q0Nw9tb>_%5=SQ^{=@FKunc(|^ML$T12eE=G{1An8Z}O$k zb?iIt#Ak0Dx`LlbFr2M%WS`mmZN&@qpY{fe6Wn$kbKT3y;NF0+H~O)q3+if!Fb+OG(PtpNYk9>P&GQOKCWG%@d$YY>zFY8$ z!cI2?z7(SpMm0~j`s@)|E+m7DhWtNVm^ZtwutBxI(A{Y4CP_WqcD&VIgq`9VylDDj zS&81QZ1z^lBa(t5QwEgBuPLcd!a7`rP#?+ogr2CTQxwT5G~G1?Mje2SyKYaP)ShEE z<2$mxD~lu_LG&ibFyiOloE|MaJ9a<%0-42~ht0po6e6dR6i;lQaD1aw^61 z--aC{%P~~ci7RzR0=xu}YdWq66WT{3wHsNuU*U6w#68E(ZBR(oNC#f%cyEjiAK6;f)@o4GH( zaJk(*@=Ok1yRudM00=)`7cCT8wV%bY(eGzf$5P2UIjg;a2Y1enTha}X;~u`W&#?Hs z3l(BDXFBTQCf3e3(abh6c`6{8hkR-Xr`U_n+?>so1ty7l(c7hMbpj1lCqckK+jnDX zm*8zsUoF|=Xj(Zw!K)NnSNbUX`xivjqnM{gDm`oT#l4fKx^;evFKTSh8DgDdpA9S- zlISrlq_pno-&Q4ipGXrL+|L*$+jm*r)AR}ktV7&b>@g|i8S@o27>bMx>>l$+dl_e+ zi!jj_FU+9pZY5^2jLuT_B}ocW-abi)oi!EqYVFLp`#wN_^{LF^+CQ{Em2sd?usSJD>$n=S3`aCfvxPx~+1f>>fjJMBve5iJr6 zqJoc9p3#7iDyjI};4mf{A*x78IjpEv#Y z<(rNvLz>}ax@XK#n-hn-@YawmGXO_4CXvJU>E3pTze8oM7kW$Ci)1J2{_sGY*t&A$ zQT{+eA0w*shXKer4W}8MZUMr(uKU<=}PmMLrgVd#vDFTPwE& z^7`N4<|`oghVviaBk;uvqkTbdpM1H9D|j7AkziV0`IkL}^CcXBb+JH)i&gEurL*PA zI(}SKyq#Ss=8O|pIY(A?W0*M=GopIf-&FEoapL@x@1bVBG(wEjljWbx76AJZm{)dA zxwmhb-LttC{bdp(=;Y}O(q75P6EsADE<&k60|7pfTMfU$b}H&U-P|Qdhbzs>#-?$* z-GWTUm#8uM-oLKdOCf`1DotHolc7E0FOp*_qZti0>{C<~|kEiR@${nTs$ zQ`FznhVzHF{{~>I``V0}DuO}0;*2+>TUCnJhk_Vw1%M5ts?6Jh6evNyMvP`}Dn;dq z#zjFp{9x*cMKAGQ8ze4yq~6wnw|IoIWfZq~lc!es*sSh)hwQ`Yi_T1#!<6gK6-{4# zA(qJOKj0*r;h^%pC&+Wz=j_*Dh?FZ^mC{l3V>oGy22-v>2yjsdNer^~NBEM^~Tu$j+Fk@nn>%h*t`i z6sV?O!!i>NDiqsi5x_Mlc02#81e&~37dB*S8gY=7<-=kYHMc;zmv?E4{W<;+v$E;J z{<*=v+c1*Zrp}ZYl4zm$yIH(gYm)(x=qOZcv773zsd-@*?T^YA5jb|Mefhg^TM^_J zuOJ4_Zr{lJfGyP#pKwQ6n-qCU+wQp7an(lS@YuPRXo^#S_=Haoi{nk9<2?X23^qHn z-6MWVv=JsavTGJgraZK6#B4oMfhorE3txqTvA>b>AJsFj7Ja0Ydq{W_$9dac%IV17 zmPh$c;YGLC#3w4c$jEKAjA)G~4}d(0;S@e{(ocn7nnPq?V|Hvx-g>bAzE%J5%E#fBg<=C;bk%}bVRmgw zAMvdfrVq5gi?j}QC~Ugb3}IoN1?&0@V#XS+t@_IS1oGYV-#gcOCc)!h$BQWy7B(8i zf-T7d+A56$LdS?>P9Yn6#NmHCOk4+MlvbF(YBLrR{%7hBuPlb>Z#;JssN8PiIcmGA zk3MPsu4GB)v8fSn!{#X{&WWl8Y)@BpqBx~+QF$61BY%i>P)`7bF$`))&)3~ejhl%&cuZp5{=8e`AZ~O6*#-E4;c^MHNO1p8C-& z*AZgL=lFUT3TpXD?#%VIm%mxlnUPu!-$m533F zw2Qb%)wsV7nk;iIkfY|iy(8PD>kzM%@k=tJq<+k;rzWskfIg{qlPCh+lS~|HNjJ8G zqEWxYh~xl{ABCX4gVAQWDeh%;%+iJm01U^2lGOsy2+44*@NGv-63B8dh1h~KU^nCN zu;gbkM2IIyLcv4T=DVv&ws_I`kze^5n>4keS{r(-HWq!jNFe$wt4kxS+WOfB9wUPR z0e7t1vjup=<=*EtZmp3oNzb&fJKDUTMI2pIoF>HjSqJUC*~5aEM?1#~uHW^tx@7S2 z=HApEzTcWK(D?Lc@1YSi@p{p+HqW9@Fr1T_$d!a|DnwR0}#*Gvzd|71X z{g6z)-S85T*RV^r-#=duIdzJkyQgkAju_6m?CR~ezpidNTg?3z?>t+V5+Fc;009C7 z2oNAZfB*pkw+qz1`!D+c`t+KWnseOrpYYhnoWK3cJb?fK0t5&UAV7cs0RjXF5J-XS zzayHM#HQV}xgruEK!5-N0t5&UAV7cs0RsO);N@6E6QfwqKVBEpx7F%>xjxnP9Q72v tW~JgB=Xra^(*N(D`o%y literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G9/blast_db/contigs.fasta.nto b/tests/test_data/outputs/extract/G9/blast_db/contigs.fasta.nto new file mode 100755 index 0000000000000000000000000000000000000000..20d5cb86e6dff1f3684dc229a358a2ea697cecfb GIT binary patch literal 8 KcmZQ%fB*mh5C8%I literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G9/filtered.hsps.txt b/tests/test_data/outputs/extract/G9/filtered.hsps.txt new file mode 100755 index 0000000..cb46930 --- /dev/null +++ b/tests/test_data/outputs/extract/G9/filtered.hsps.txt @@ -0,0 +1,20 @@ +qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore locus_name ext_start ext_end is_5prime_complete is_3prime_complete is_complete is_5prime_boundary is_3prime_boundary is_on_boundary reverse complement is_extended is_5p_extended is_3p_extended +0 0 102 22584 0 101 11 112 102 0 100.0 100 100 plus 1.82e-51 189 locus_1 11 112 True True True False False False False True False False False +10 0 858 22584 0 857 609 1466 858 19 97.786 100 100 plus 0.0 1480 locus_11 609 1466 True True True False False False False True False False False +11 0 972 22584 0 971 1478 2449 972 0 100.0 100 100 plus 0.0 1796 locus_12 1478 2449 True True True False False False False True False False False +12 0 1098 22584 0 1097 2461 3558 1098 0 100.0 100 100 plus 0.0 2028 locus_13 2461 3558 True True True False False False False True False False False +13 0 1281 22584 0 1280 3570 4850 1281 11 99.141 100 100 plus 0.0 2305 locus_14 3570 4850 True True True False False False False True False False False +14 0 1434 22584 0 1433 4862 6295 1434 0 100.0 100 100 plus 0.0 2649 locus_15 4862 6295 True True True False False False False True False False False +15 0 1464 22584 0 1463 6307 7770 1464 15 98.975 100 100 plus 0.0 2621 locus_16 6307 7770 True True True False False False False True False False False +16 0 1836 22584 0 1835 7782 9617 1836 0 100.0 100 100 plus 0.0 3391 locus_17 7782 9617 True True True False False False False True False False False +17 0 1914 22584 0 1913 9629 11542 1914 0 100.0 100 100 plus 0.0 3535 locus_18 9629 11542 True True True False False False False True False False False +18 0 2037 22584 0 2036 11554 13590 2037 16 99.215 100 100 plus 0.0 3674 locus_19 11554 13590 True True True False False False False True False False False +1 0 285 22584 0 284 13602 13886 285 17 94.035 100 100 plus 2.33e-124 433 locus_2 13602 13886 True True True False False False False True False False False +19 0 4935 22584 0 4934 13898 18832 4935 0 100.0 100 100 plus 0.0 9114 locus_20 13898 18832 True True True False False False False True False False False +2 0 327 22584 0 326 18844 19170 327 0 100.0 100 100 plus 5.4e-176 604 locus_3 18844 19170 True True True False False False False True False False False +3 0 417 22584 0 416 19182 19598 417 11 97.362 100 100 plus 0.0 710 locus_4 19182 19598 True True True False False False False True False False False +4 0 444 22584 0 443 19610 20053 444 15 96.622 100 100 plus 0.0 737 locus_5 19610 20053 True True True False False False False True False False False +5 0 543 22584 0 542 20065 20607 543 0 100.0 100 100 plus 0.0 1003 locus_6 20065 20607 True True True False False False False True False False False +6 0 606 22584 0 605 20619 21224 606 15 97.525 100 100 plus 0.0 1037 locus_7 20619 21224 True True True False False False False True False False False +7 0 642 22584 0 641 21236 21877 642 0 100.0 100 100 plus 0.0 1186 locus_8 21236 21877 True True True False False False False True False False False +8 0 684 22584 0 683 21889 22572 684 0 100.0 100 100 plus 0.0 1264 locus_9 21889 22572 True True True False False False False True False False False diff --git a/tests/test_data/outputs/extract/G9/processed.extracted.seqs.fasta b/tests/test_data/outputs/extract/G9/processed.extracted.seqs.fasta new file mode 100755 index 0000000..b11d278 --- /dev/null +++ b/tests/test_data/outputs/extract/G9/processed.extracted.seqs.fasta @@ -0,0 +1,38 @@ +>locus_1:0:0:0 +atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa +>locus_11:10:0:1 +gtgaacctggtgaaaaccacgggtctgcacgcggcgggcaaaggcattaacgttgccaaagtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccgacttcaacttttccggctttgacgtgactcccgcagactgggaacgctttgttaacgactccctgagctggctgggtcagttcgatatggtctgcgtcagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatctttgatagtagccgtgaagcgttagtcgccggtcttaaagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcacgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgtgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggtaagtaccgtcggcgcaggcgattccatggttggcggattgatttacggcctgctgatgcgcgagtccagcgaacatacgctgcgtctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga +>locus_12:11:0:2 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>locus_13:12:0:3 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>locus_14:13:0:4 +ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctcttccgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccaacgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcagaaaggcgggattggtttgcgcggtccgggtgaaacccagctcgaaaccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgaaaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgctgctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaaatccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtctgcaggtacgaatgccgattgtcgactggcgtcgcctctgtaaacaagaaccagcgttgatcgaatacgtgatctag +>locus_15:14:0:5 +gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga +>locus_16:15:0:6 +atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctggtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatcagcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacgctgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcattcaaccggaatggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaacgctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgaggcgctggcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtgagctgaccagccttgtgcgcaaccttaatcaactgctcaaaagcgagcgcgaacgttataacaaataccgcacgaccctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatcagcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcataccccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttttggccgacagcatcccacacagaaagaggaataa +>locus_17:16:0:7 +atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa +>locus_18:17:0:8 +atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag +>locus_19:18:0:9 +atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacattgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccaccgtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggccgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattactggaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcattcagggcggtacggaagtttgtcagcaaaaactggataccatgcgcaacgcaggcgtaaaagtgaacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggcgtcagcgcagagatcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacaggagaccggcaagctcggcgagatcctgttctttatgcgtgcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccggcaataactggcagttcgacggcgacgcggaaactattgcccactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgaaattaccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa +>locus_2:1:0:10 +atgaaactttttctgacaacggcagcgctaactgcgaccttgatttccggcatggcatttgccagcgatcccgtcatcccctgggcgactaacagcggcggtacggaaagcacgcatattgcggcaatgggcgaggatttgaatgcgcagcatcagcagatcacccacacgcatgaaggcgtatgggcggccaattccggcagcattcaggctgatgaagccgcgctgaccagcaacaagccgccggtacaaggccacccggaattgatgccgcatcagggctaa +>locus_20:19:0:11 +atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa +>locus_3:2:0:12 +atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa +>locus_4:3:0:13 +ctgaaactaatacctttttatttgctcgcgctattttctgccgcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaaccactgcagataacccgccttataccatgacaatattgccgccagtagaaggctgcggggaaaaatgtgtgaaactgaacgtacagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaaggacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctataaccgacagctgcaattactttgggctggcgtaataccgaaaaataaaaaggggaaatggataacgaaaatggaaaattactggttcaggcaggtaaaagcgcactaa +>locus_5:4:0:14 +atgaaaaaattttatagctgtcttcctgtctttttactgatcggctgtgcccaggtgcccctcccttcctccgtgagcaaaccggtacagcaacctggcgctcagaaagagcaactggccaacgcaaatagtattgatgagtgtcagtctcttccgtatgtgccgtcagaccttgcgaagaataaatcattatcaaaccagaacgctgataattccgcatcaaaaaatagcgcaatcagctcaagcattttttgcgaaaaatataaacaaaccaaagaacaggcgcttaccttcttccaggaacatccacaatacatgcgttcgaaagaagatgaagagcaactcatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa +>locus_6:5:0:15 +atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa +>locus_7:6:0:16 +gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggcctcacaacatgcctggagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgtcagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcaccatccagtcggcaacctggttttccgcaggcaggtcgcttacggcggataaaaaaatgatggattgcggcagcggtatttatgcttcaatcaatacgttactgaaaaagtcgcagaataaaaatatagtcatttttacccataatcattgtttgacgtatattgcaaaaaataagcgcggcgtgaagtttgatccggactatcttaatgcgctcgttatgtatgcggaaaacggaaaactctttttagacggtgaatttgttcccggctag +>locus_8:7:0:17 +atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga +>locus_9:8:0:18 +atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/extract/G9/raw.extracted.seqs.fasta b/tests/test_data/outputs/extract/G9/raw.extracted.seqs.fasta new file mode 100755 index 0000000..b11d278 --- /dev/null +++ b/tests/test_data/outputs/extract/G9/raw.extracted.seqs.fasta @@ -0,0 +1,38 @@ +>locus_1:0:0:0 +atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa +>locus_11:10:0:1 +gtgaacctggtgaaaaccacgggtctgcacgcggcgggcaaaggcattaacgttgccaaagtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccgacttcaacttttccggctttgacgtgactcccgcagactgggaacgctttgttaacgactccctgagctggctgggtcagttcgatatggtctgcgtcagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatctttgatagtagccgtgaagcgttagtcgccggtcttaaagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcacgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgtgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggtaagtaccgtcggcgcaggcgattccatggttggcggattgatttacggcctgctgatgcgcgagtccagcgaacatacgctgcgtctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga +>locus_12:11:0:2 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>locus_13:12:0:3 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>locus_14:13:0:4 +ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctcttccgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccaacgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcagaaaggcgggattggtttgcgcggtccgggtgaaacccagctcgaaaccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgaaaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgctgctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaaatccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtctgcaggtacgaatgccgattgtcgactggcgtcgcctctgtaaacaagaaccagcgttgatcgaatacgtgatctag +>locus_15:14:0:5 +gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga +>locus_16:15:0:6 +atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctggtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatcagcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacgctgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcattcaaccggaatggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaacgctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgaggcgctggcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtgagctgaccagccttgtgcgcaaccttaatcaactgctcaaaagcgagcgcgaacgttataacaaataccgcacgaccctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatcagcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcataccccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttttggccgacagcatcccacacagaaagaggaataa +>locus_17:16:0:7 +atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa +>locus_18:17:0:8 +atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag +>locus_19:18:0:9 +atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacattgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccaccgtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggccgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattactggaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcattcagggcggtacggaagtttgtcagcaaaaactggataccatgcgcaacgcaggcgtaaaagtgaacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggcgtcagcgcagagatcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacaggagaccggcaagctcggcgagatcctgttctttatgcgtgcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccggcaataactggcagttcgacggcgacgcggaaactattgcccactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgaaattaccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa +>locus_2:1:0:10 +atgaaactttttctgacaacggcagcgctaactgcgaccttgatttccggcatggcatttgccagcgatcccgtcatcccctgggcgactaacagcggcggtacggaaagcacgcatattgcggcaatgggcgaggatttgaatgcgcagcatcagcagatcacccacacgcatgaaggcgtatgggcggccaattccggcagcattcaggctgatgaagccgcgctgaccagcaacaagccgccggtacaaggccacccggaattgatgccgcatcagggctaa +>locus_20:19:0:11 +atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa +>locus_3:2:0:12 +atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa +>locus_4:3:0:13 +ctgaaactaatacctttttatttgctcgcgctattttctgccgcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaaccactgcagataacccgccttataccatgacaatattgccgccagtagaaggctgcggggaaaaatgtgtgaaactgaacgtacagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaaggacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctataaccgacagctgcaattactttgggctggcgtaataccgaaaaataaaaaggggaaatggataacgaaaatggaaaattactggttcaggcaggtaaaagcgcactaa +>locus_5:4:0:14 +atgaaaaaattttatagctgtcttcctgtctttttactgatcggctgtgcccaggtgcccctcccttcctccgtgagcaaaccggtacagcaacctggcgctcagaaagagcaactggccaacgcaaatagtattgatgagtgtcagtctcttccgtatgtgccgtcagaccttgcgaagaataaatcattatcaaaccagaacgctgataattccgcatcaaaaaatagcgcaatcagctcaagcattttttgcgaaaaatataaacaaaccaaagaacaggcgcttaccttcttccaggaacatccacaatacatgcgttcgaaagaagatgaagagcaactcatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa +>locus_6:5:0:15 +atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa +>locus_7:6:0:16 +gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggcctcacaacatgcctggagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgtcagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcaccatccagtcggcaacctggttttccgcaggcaggtcgcttacggcggataaaaaaatgatggattgcggcagcggtatttatgcttcaatcaatacgttactgaaaaagtcgcagaataaaaatatagtcatttttacccataatcattgtttgacgtatattgcaaaaaataagcgcggcgtgaagtttgatccggactatcttaatgcgctcgttatgtatgcggaaaacggaaaactctttttagacggtgaatttgttcccggctag +>locus_8:7:0:17 +atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga +>locus_9:8:0:18 +atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/extract/G9/seq_data.txt b/tests/test_data/outputs/extract/G9/seq_data.txt new file mode 100755 index 0000000..216d0ab --- /dev/null +++ b/tests/test_data/outputs/extract/G9/seq_data.txt @@ -0,0 +1,20 @@ +id seqid locus_name query_id qlen start end sub_start sub_ent ident qcovs bitscore reverse complement is_complete is_trunc fivep_trunc threep_trunc is_extended is_5p_extended is_3p_extended seq start_codon stop_codon is_stop_valid is_start_valid is_cds_valid +0 0 locus_1 0 102 11 113 11 112 100.0 100 189 False True True False False False False False False atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa atg taa True True True +1 0 locus_11 10 858 609 1467 609 1466 97.786 100 1480 False True True False False False False False False gtgaacctggtgaaaaccacgggtctgcacgcggcgggcaaaggcattaacgttgccaaagtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccgacttcaacttttccggctttgacgtgactcccgcagactgggaacgctttgttaacgactccctgagctggctgggtcagttcgatatggtctgcgtcagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatctttgatagtagccgtgaagcgttagtcgccggtcttaaagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcacgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgtgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggtaagtaccgtcggcgcaggcgattccatggttggcggattgatttacggcctgctgatgcgcgagtccagcgaacatacgctgcgtctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga gtg tga True True True +2 0 locus_12 11 972 1478 2450 1478 2449 100.0 100 1796 False True True False False False False False False atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga atg tga True True True +3 0 locus_13 12 1098 2461 3559 2461 3558 100.0 100 2028 False True True False False False False False False atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga atg tga True True True +4 0 locus_14 13 1281 3570 4851 3570 4850 99.141 100 2305 False True True False False False False False False ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctcttccgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccaacgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcagaaaggcgggattggtttgcgcggtccgggtgaaacccagctcgaaaccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgaaaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgctgctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaaatccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtctgcaggtacgaatgccgattgtcgactggcgtcgcctctgtaaacaagaaccagcgttgatcgaatacgtgatctag ttg tag True True True +5 0 locus_15 14 1434 4862 6296 4862 6295 100.0 100 2649 False True True False False False False False False gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga gtg tga True True True +6 0 locus_16 15 1464 6307 7771 6307 7770 98.975 100 2621 False True True False False False False False False atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctggtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatcagcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacgctgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcattcaaccggaatggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaacgctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgaggcgctggcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtgagctgaccagccttgtgcgcaaccttaatcaactgctcaaaagcgagcgcgaacgttataacaaataccgcacgaccctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatcagcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcataccccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttttggccgacagcatcccacacagaaagaggaataa atg taa True True True +7 0 locus_17 16 1836 7782 9618 7782 9617 100.0 100 3391 False True True False False False False False False atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa atg taa True True True +8 0 locus_18 17 1914 9629 11543 9629 11542 100.0 100 3535 False True True False False False False False False atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag atg tag True True True +9 0 locus_19 18 2037 11554 13591 11554 13590 99.215 100 3674 False True True False False False False False False atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacattgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccaccgtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggccgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattactggaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcattcagggcggtacggaagtttgtcagcaaaaactggataccatgcgcaacgcaggcgtaaaagtgaacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggcgtcagcgcagagatcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacaggagaccggcaagctcggcgagatcctgttctttatgcgtgcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccggcaataactggcagttcgacggcgacgcggaaactattgcccactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgaaattaccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa atg taa True True True +10 0 locus_2 1 285 13602 13887 13602 13886 94.035 100 433 False True True False False False False False False atgaaactttttctgacaacggcagcgctaactgcgaccttgatttccggcatggcatttgccagcgatcccgtcatcccctgggcgactaacagcggcggtacggaaagcacgcatattgcggcaatgggcgaggatttgaatgcgcagcatcagcagatcacccacacgcatgaaggcgtatgggcggccaattccggcagcattcaggctgatgaagccgcgctgaccagcaacaagccgccggtacaaggccacccggaattgatgccgcatcagggctaa atg taa True True True +11 0 locus_20 19 4935 13898 18833 13898 18832 100.0 100 9114 False True True False False False False False False atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa atg taa True True True +12 0 locus_3 2 327 18844 19171 18844 19170 100.0 100 604 False True True False False False False False False atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa atg taa True True True +13 0 locus_4 3 417 19182 19599 19182 19598 97.362 100 710 False True True False False False False False False ctgaaactaatacctttttatttgctcgcgctattttctgccgcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaaccactgcagataacccgccttataccatgacaatattgccgccagtagaaggctgcggggaaaaatgtgtgaaactgaacgtacagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaaggacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctataaccgacagctgcaattactttgggctggcgtaataccgaaaaataaaaaggggaaatggataacgaaaatggaaaattactggttcaggcaggtaaaagcgcactaa ctg taa True True True +14 0 locus_5 4 444 19610 20054 19610 20053 96.622 100 737 False True True False False False False False False atgaaaaaattttatagctgtcttcctgtctttttactgatcggctgtgcccaggtgcccctcccttcctccgtgagcaaaccggtacagcaacctggcgctcagaaagagcaactggccaacgcaaatagtattgatgagtgtcagtctcttccgtatgtgccgtcagaccttgcgaagaataaatcattatcaaaccagaacgctgataattccgcatcaaaaaatagcgcaatcagctcaagcattttttgcgaaaaatataaacaaaccaaagaacaggcgcttaccttcttccaggaacatccacaatacatgcgttcgaaagaagatgaagagcaactcatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa atg taa True True True +15 0 locus_6 5 543 20065 20608 20065 20607 100.0 100 1003 False True True False False False False False False atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa atg taa True True True +16 0 locus_7 6 606 20619 21225 20619 21224 97.525 100 1037 False True True False False False False False False gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggcctcacaacatgcctggagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgtcagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcaccatccagtcggcaacctggttttccgcaggcaggtcgcttacggcggataaaaaaatgatggattgcggcagcggtatttatgcttcaatcaatacgttactgaaaaagtcgcagaataaaaatatagtcatttttacccataatcattgtttgacgtatattgcaaaaaataagcgcggcgtgaagtttgatccggactatcttaatgcgctcgttatgtatgcggaaaacggaaaactctttttagacggtgaatttgttcccggctag gtg tag True True True +17 0 locus_8 7 642 21236 21878 21236 21877 100.0 100 1186 False True True False False False False False False atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga atg tga True True True +18 0 locus_9 8 684 21889 22573 21889 22572 100.0 100 1264 False True True False False False False False False atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag atg tag True True True diff --git a/tests/test_data/outputs/locidex_db/blast/nucleotide/nucleotide.fasta b/tests/test_data/outputs/locidex_db/blast/nucleotide/nucleotide.fasta new file mode 100755 index 0000000..39f119a --- /dev/null +++ b/tests/test_data/outputs/locidex_db/blast/nucleotide/nucleotide.fasta @@ -0,0 +1,40 @@ +>0 +atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa +>1 +atgaaactttttctgacaacggcagcgctaactgcgaccttgacttccggcatgggatttgccagcgataccgtgatcccctgggcgactaacagtggcggtacggaaagcacgcatatcgcggcaatgggcgaggatgtgaatgcgcagcatcagcacatcatccacacgcatgaaggcgtatgtgccgcaaattccggcaccattcaggctgatgaagccgcgctgaccagcaacaagccaccggtacaagtccaaccggaattgttgccgcatcagggctaa +>2 +atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa +>3 +ctgaaactaatacctttttatttgctcgcgctattttctgcctcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaacccctgcagataacccgccttataccgtgacaatattgccgccagtagaagcctgcggggaaaaatgtgtgaaactgaacgtagagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaacgacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctacaaccgacagctgcaattacttagggctggcgtaataccgaaaaataaaaagtggaaatggataacgaagatggaaaattactggttcaggcgggtaaaagcgcactaa +>4 +atgaaaaaatttgatagctgtcatcctgtctttttactgatcggctgtgcccaggtgcctctccgttcctccgtgagcaaaccggtacagcaacctagcgctcagaaagagcaactggccaacgcaaatggtattgatgagtgtcagtcacttccgtatgtgccttcagaccttgcgaagaataaatcattatcaaaccagatcgctgataataccgcatcaaaaaatagcgcaatcagctcacgcattttttgcgaaaaatataagcaaaccaaagaacaggcgcttaccttcttccaggaacttccacaatacatgcgttcgaaagaagttgaagagcaacacatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa +>5 +atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa +>6 +gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggactcacaacatgcctgtagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgccagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcgccatccagtcggcaacctggttttcggcaggcaggtcgcgtagggcggttaaaaaaatgatggattgcggcagcggtatttattcttcaatcaatacgttactgaaaaattcgcagattaaaaatatagtcattttaacccataatcattgtttgacgtatattgtaaaaaataagcgcggcgtgaagtttgatccggagtatcttaatgcgctcgttatgtatgcggaaaacggaaaactccttttagacggtgaatttgttcccggctag +>7 +atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga +>8 +atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag +>9 +atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa +>10 +gtgaacctggtgaaaaccccgggtctgcacgcggcgggcaaaggcattaacgttgccaatgtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccaacttcaactttcccggctttgacgtgactcccgcagactgggaacgctttgttaacgacttcctgagctggctgggtcagttcgatatggtctgcgtaagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatcgttgatactagccgtgtagagttagtcgccggtcttatagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcccgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgcgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggaaaataccgtcggcgcaggcggttccatggttggcggatggatttacggcctgctgatgcgcgagtccaccgaacatacgctgcgcctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga +>11 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>12 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>13 +ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctattacgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccagcgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcggaaaggcgggattggttcgcgcggtccgggtgaaacccagctcgaagccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgagaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgcagctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaattccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtccgcaggtacgaatgccgattgtcgactggcgtcgcttctgtaaacaagaaccagcgttgatcgaatacgtgatctag +>14 +gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga +>15 +atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctcgtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatctgcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacggtgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcactcaaccggaagggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaaggctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgatgcgctgtcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtaagctgatcagccttgtgcgcaaccttaatcaactgctcaaaagcgagcacgaacgttataacaaataccgcacgagcctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatctgcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcatagcccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttctggccgacagcatcccacacagaaagaggaataa +>16 +atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa +>17 +atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag +>18 +atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacatcgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccacagtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggtcgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattacttgaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcatacagggcggtacggaagtttgtcagcaaaaactggataacatgcgcaacgcaggcgtaaaagtgtacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggggtcagcgcagagctcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacagaagaccggcaagctcggcgagatcctgttctttatgcgggcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccagcaatgactggcagttcgacggcgacgcggaaactattgcacactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgagatttccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa +>19 +atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa diff --git a/tests/test_data/outputs/locidex_db/blast/nucleotide/nucleotide.ndb b/tests/test_data/outputs/locidex_db/blast/nucleotide/nucleotide.ndb new file mode 100755 index 0000000000000000000000000000000000000000..a9b411b3621a29a79d75d1c0da6f615196410c93 GIT binary patch literal 20480 zcmeI%K~94}6adgKnn>JWOz&Xac?7Rw6q6Y_2JE}A?R z%pYd>{61E{M5H5cgOBCs;v`+#jMwis?esDtziXqtr?39lyJ@tT_U}A%v@Rq-fB*pk z1PBlyK!5-N0t5~h82ax2=>Lc8L)6ur^`)orte^kfcla-qfdByl1PBlyK!5-N0t5&U zNd0$27L%A4%VJ(oAwYlt0RjXF5FkK+009C7{#M|&NteZWyw$Vn{<+*F&tv!qK1AJP z)BLoXMvU&CR!{YB?@yoGW1Fa{;wo0fO81{=jRXh~AV7cs0RjXF5FkL{2n8`+M{ky7LD7ATZ(!@Cx4FfuDdc!3W?g@FDm;_$l}r zd<6ageg^&sKK}2Op#dfopasq;Kod+UKpV^`KqFjGfL6Gq0L?I`0PS!^0UF|(0<^@< GkH8C-tr(mD literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/locidex_db/blast/nucleotide/nucleotide.nin b/tests/test_data/outputs/locidex_db/blast/nucleotide/nucleotide.nin new file mode 100755 index 0000000000000000000000000000000000000000..10e6f3db680b3ce612ca913c6c072fc64983b718 GIT binary patch literal 372 zcmb`C%L)Nu7=~YGjgV+IO8BFzm!Uhb-5Ih@F&bvCjaS3G zM80)2-@heuy^lzxP*O8CyD}+XB0l#oJb!sUA5YjZv*(e^Q-CbXIH7APY}09RzU`{l z<&}dsG+lKzlYPWM&y4B2FXB0LKEJQ|1(W}4Aaq}3LmU+NR^B|84%m$R^ehzNYTM94 zG{XCN+RD;!;LhCRp|tG}v_)(3ynrGNckk4A$poVw=C_y<1nmYhNie(1AA0F%_oeb6Y`l1D7@4{cnV2kZn#&eJfKB&|a~8kcvs&jP0r| z=}p!#h+f~gLP1ZSyIgg#)B(ZJG|d(Tvvx78wMmO6?+jK+Kcq!nU>SbYn2NvUx`+Ml zDG_mJD1Rzex2k?CwKfCc7N`|rwXXo1m>WrFw8Qu78xNsZ12H< z?=@Uu=q&JA)Z6vs@v8-Pg_k=nH|Lt%w=qRsVrA--!@X5ru5E!QN1w<)XRM|fZ1Wi2 znGHJh<#}tNd@)b+HfLatdK%uTws%b5VpG3te>AhCsXvvuCD_EMTXT6&Dqdd~t>@^( zURad}Ya)+FJHXu1yQ9o{lwiXXSF01wPuyO;$#;K}vUG*=W&$=V#Ur}pv9!&$G7heI z=vzt}%Pn=V|@v?l$-*M18t9 zq>DYU1~WiWn}3wye|T$1p&xS-!CAXg{-&j`y{NEq=k^Xv1l=qYL3r3s2EbO;{(1Jj z&)iV_j-2)I_jrL6vl^rEP@g$lb~L*hGCSlE(JjyikFCp#g~nH2!Rv>20m@wHNv$(~ z?1ameUA7GYRR_s=GVd^ue-goo1R?Z9lqvhFOIagHB&mRru_!WsBRlVXz_!htw>@!J z$P^b5lbU;jJV*?C$3M+ux+S>rL&%}EW`}A6Y*2Y=u3LWA-In-u!Xx>3Sm$ZE&DuPa z8o}I*6+6goC2TYNJ6+S*o9{9o7~DR-N;qTplYvd``l281w@uQDyb_lB!&04Rv9jPd zQ?7IA>wlS-93NJHS@v@6EySgg72BSx`@FmFpq)*yA=+9#g<1KTK1ML*UUAj2A(QT& zU(O^AOr_+vv9B42rA;~Uqv<$rr;!A-*83$S?riY=Y75B2iNARF{dy+xOI^tS!QAq^ zlIscVdrtxUfHwuwqzG@V%I@367sO3ybE(2#zg(h--cly^ETm+)6P&LayO;1xyb=cI z=!VgT3FOhNHTq~nkKjQ%lP_z2{iXUUNk9}&pmoHBDz z%`1(Z>8osb1E2bUcQN@ytkcZHHD6&=^TM!k-OVq!X-5&&hrRmCl(-8rEi-v#Twl%@ z)aq(0eUpHo=e44>0b_FTvAPjt`*q>n&fB>v&1TbD*qM6_1SH-X0$=rQncDj)FShY2 zwQiZF>40T-jra~>Qs)MXOi}_=BjqJHCyTV?I2eb2Xy+cEZ}=4UyBA!fh_cpm<7K9D zZe+=^X67Mne_sXkel(5lccYA*FoOwv;N~$Q`=0mjt$#>0i~E5@uvt5u#7D9=pqlz( zycUZilL<)V1-^0h*t1sI60=eSQ3UOXOQnX{McO8Ul|Eg$Kni8cMHkJv(WeKZdtc3Ya8xKx@z^nq5yKAKu>NZ zj$+A7q1^N{Jg@gl@$B2tLU6`pQ#_t{(-Jnckgv?^heA4di=>^KAKFO9%H7%%i&0mS z&Yj&9-|MGThzkXx8O^ctxfd?GsUCO)r+%KGZni~1RFE$n%==$hV;FWJ;*qMX;Y#Z$ zv#IF$c#%6{viat<${`L@h#JP&`phavW{o&T+0h}k2DFRJr$Ns5!6mwVE#c$Nu(RpL zHqW&nZ>`cce^;36R*N}r$JSWe`dP7?Qa&F9i-;Jjv2omL&RRcRuza>!dtZrQjvS$$ zt=%Kfw2Sp0{URg@7r*oM&P^CVeDFuM)A6=hX(ugJ?o2Q4-R7L9A!(*Dtk)m z@~1^!y;eo}BC$cGIXjpwm}-VuW_{oUBq7qPtn`dy1p31C{!;&4lBKsP8bp^3QjI2n z0sBBN$Yylxat1fZlT^7qx>KNUva0BT*P~I54Vlsn_c0`AJdUrn#q5!xcUE~7l<~w9 zJWj*5?^WcuBS$cAOV<`*o~Xc_E&rzwlG5I9s#`Zi?6q zZK1QVb(&@O=~24!8_HXdDC#p#`CPFQCrrVgZzbj-W;Pf^QDxJg`|1`kpT$RDRtklP z{a1oLu{uVcBS8u&likO)Q7f+wH@{A8B_cdk=O8R~V0^?+l>$5*IevYlr4j<5#MCB{ zhV!|6i@*;KHQCclJZwFja)-P_X0o2dd_;BU+d_K=3tWW()MEHdEHHyb(7JsVzg_C^ z3LrtjHb=8ffr9)y=E)}*d?M6WYi@+#l?6!v+fiJ~&1b-M)q=L%;1Y>PxhEXEed6?3 z6brq(-?XNpztoR$JJWfHsYj|LR;$}bFh5Sw7t0$3-P8x*QCi`*Se)WpjcQY)ap7_c zNyo$+rG9=_Ehelg32aR?-w0% zT8qKLj4x6C*vMl67bYt!9)G6X&f1FW;~TMONGkF36Y0Ei*@Cj4bCi1f>f*Nx>0;Xk zdF-mC_ft!)G*<_7asDQQB4ON{*qnuo@d+&xcit!+#s5Ie@g&s4%`A-*-LC@f&L<+{$R6Xpu6K|taaeV_<3)$6p6l}*aCSSv8LrUd28)FnmXnmK?s>1 zT6$&8F0egv8l%w>)wVKLgCb>}HN#%FZLO$~kk5O>ZI|M?POF&fO~ti9q`Mt`l4l$g zYOHy{0p{w9)vE($`ELjE&BZaBlGq(xQ3v2}7?*@%!e z(q-?8<-8M64SHFb%^&$1M8A1~(l$px14)gDgX>fgIo$YH$!_FdBu{D;FEy8!uueX5 zZhk&jwM5aI^;O7NlKZ@(>ofP>C&Cd+;f`rNl-VM$4CLs6f}%N!sbF<_ zN|VaF=^Gu8;(N;pnbHE1x1Qb*-Yl7VM_l3ZRa_sI5k@?E@(T0GRw2GO2goI$=v7H- zYSz~w;MG{=!FKXwSF^lCOgKLJxnN%tFV*#tX(h+}QTNk>P3bF7d6b+5>?t~_rkPyI z{I?HW5CD*1k?MH_WWoxM?Wxr{J+>gP?!7ua)2I2cNW;QeW>_RN zfxiF!%KE5>oiD~2*Zk_iwdY@IwL2>nsCzM}bQ#p5%;V){#0#^V>Qw`}k?D@(OiB)9 z$ticJ8&n0cteaS4IV}n+9x4j1dP$l~Oe9YhUaA_cZ}mM#gXb#GK6 zVlF`u$k6FL`8=wO$l>o8%NomoX3me2zdcz2f8Jl&?ueazl?VwYA;n_aecYIR>^v>? zNnvWbo9~uqO#W9wA>S&v%Ooha)+3^+5=cN2!_50@Uhf!$6hgSciR@TXRI)WdCNp4@ za~jM5>pVTCAE_8jGz5c>oN{-$o%MOJV)qw_{?F@5hfAX;sN;m zo_Cq&3Y8SHzq;hY&7%AqB32zcdcO;LHbG?@4Ji6 zXF-N%;IHh9Ac$yO$dDX+raQ?=~!Tv?zx#{h<}|rsARE2E1~ZvB|j?DUn?OM zA^F#rY1d*^!`6+%npyawf8?B+mH;*fJOZ8~PCiC6#EgheSapnSI{Vm=pnq-`&^2v!pFhs&6WR$y41{o8a1s8Xo?Yce% z*ywgLW|%W*OyL60b;d7vs}Bk5dqdp8=95`T(QH}9mF8@$$V-Lg(rl~2opnG~pu-vf8hE{MGW=$}3NB@mwA8vqcG71| z?f2x_+D4izJhB^@>dB9$>R+3J5&96#A28U2idD>oZ1Ca*){(da3|cUtPSCcHHShUG zRu_~}pouk1;NAQ29p_#nMWfp%leM61)uM@HGh00h)JqdBkqo&9%JCs?+E^fTY^51) zvsIkjp7eto#bUni8h;S_K=x}7v#%&~S;*a|(157i@$~o3%aeu<2mL8fKx`(PcsF<& zc{90NKrLEO3N;UMcT$S8pWea$R(3V6%WzBy;4j+Pbga))L-}v|GAdcL!As^I4E|$w zJ)99S9Igh_R4}ah_U4&B$p{t?i%T-uFgglm4U;4Ae<+XP^1q{Ip~1M$vM&~*8;iWm zGU$>i8-Q)WDGLRzwOViJlA?t*cT&`^RD-j*l{()T4?r^FgMeOG@w`LqC{MF|JI0M- z>klW>%QOWRwfeU}ELN$D3L$j{^u7+jMltP`H4kcbOX4$abCe8%Mc-Y|2gXY{Z^1xp z-~F{B+e*cTw^eS>+Q)#0f&v0*zjc{VQk~#o#vy5Y_1!Uks8Tsis4MjKKV*<M1-g zehQqAys!ub1YZ}JZPo~4Z1m|8Tz>QF`D8~PeHkjfJ3n@#mTBR)?OiO1$WC^gxq=J4 z*Dqpx8paS!8ciX*5ZD>**+D}93cC%yE$cb7^Aj!Eg;%O^4kG619m;hU3h_~e1sc@` zf)GH-E45+<_gP_F#PB!y`E=tF9Z)TR;V8aL@bSA4s)M22kc4hX~XwjtV8 zzL_^biVHUzFLjem4Dfq**#_=er373f0V*WW$kikjLll5DSxWkAMDY6o28H807xW)p z_h$KCLCnoCDf72<1bKuF&s97wlX90C{GMW2=S08g&+X^-#*w`dMe5=^lQ!i`jezn^ zUx1}=;ruxsaVgdoBLJ8I#Egl*Aq+(VZHJP>dzo%BjFR++9eWG@F^1yKXezT2B%KYK ziP|T(vDolmG4t-PGI1XvZUbM^g(yCcWK(D?Lc@1YSi@p{p+HqW9@Fr1T_$d!a|DnwR0}#*Gvzd|71X z{g6z)-S85T*RV^r-#=duIdzJkyQgkAju_6m?CR~ezpidNTg?3z?>t+V5+Fc;009C7 z2oNAZfB*pkw+qz1`!D+c`t+KWnseOrpYYhnoWK3cJb?fK0t5&UAV7cs0RjXF5J-XS zzayHM#HQV}xgruEK!5-N0t5&UAV7cs0RsO);N@6E6QfwqKVBEpx7F%>xjxnP9Q72v tW~JgB=Xra^(*N(D`o%y literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/locidex_db/blast/nucleotide/nucleotide.nto b/tests/test_data/outputs/locidex_db/blast/nucleotide/nucleotide.nto new file mode 100755 index 0000000000000000000000000000000000000000..91d3a927c3e718edb4b7b0774fed440a3c193069 GIT binary patch literal 84 vcmXBF2?~H9002QdP16qZ|8H%;g<*RbO*XTzvau66NMue5m5ZB)m&V5r8>|4+ literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/locidex_db/blast/protein/protein.fasta b/tests/test_data/outputs/locidex_db/blast/protein/protein.fasta new file mode 100755 index 0000000..38c6a95 --- /dev/null +++ b/tests/test_data/outputs/locidex_db/blast/protein/protein.fasta @@ -0,0 +1,40 @@ +>0 +mydppflealmitasffaifiiivvsvlllegd* +>1 +mklflttaaltatltsgmgfasdtvipwatnsggtesthiaamgedvnaqhqhiihthegvcaansgtiqadeaaltsnkppvqvqpellphqg* +>2 +maknrsrrlrkkmhidefqelgfsvawrfpegtseeqidktvddfindviepnklafdgsgylaweglicmqeigkcteehqaivrkwlearnleevrtselfdvwwd* +>3 +lklipfyllalfsassgateinackdligtwktpadnppytvtilppveacgekcvklnveyeldvthrnalycherqegvkgqgpmviafegaygghaigtynrqlqllragvipknkkwkwitkmenywfrrvkah* +>4 +mkkfdschpvflligcaqvplrssvskpvqqpsaqkeqlanangidecqslpyvpsdlaknkslsnqiadntasknsaissrifcekykqtkeqaltffqelpqymrskeveeqhmtefkkvllepgsknlsiyqtllaaherlqal* +>5 +mrikpddnwrwyydeehdrmmldlangmlfrsrfsrkmltpdafcptgfcvddaalyfsfeekcrdfeltkeqraelvlnalvairylkpqmpkswhfvahgemwtpgtgdaasvwlsdtaeqvnllvvepgenaalcllaqpgvviagrtmqlgdaikimndrlkpqvhchsfsleqav* +>6 +vlaftlrfiknkryfailagalviiagldsqhacsgnglpqingkalaalakqhpvvvlfrhaercdrsdntclpdstgitvngaqdaralgkafsadiqnynlyssntvraiqsatwfsagrsrravkkmmdcgsgiyssintllknsqiknivilthnhcltyivknkrgvkfdpeylnalvmyaengkllldgefvpg* +>7 +mgsnyivieglegagkttardvvvetleqlgirnmiftrepggtqlaeklrslvldirsvgdevitdkaevlmfyaarvqlvetvikpalaqgvwvigdrhdlstqayqgggrgidqtmlatlrdavlgdfrpdltlyldvtpevglkrarargdldrieqesfdffnrtrarylelaaqdsrirtidatqpldavmrdiratvtkwvqeqaa* +>8 +mkhikksvlvvlltshvahasivvggtrlvfdgnndessinvenkdskanlvqswlsvadpqvtnkqvfiitpplfrldagqknsirvirsgaplpadresmywlnikgipsiddnasanrveisintqikliyrppaltkstpdsqsqqlkwqtagdvitvnnptpyymnfasvtlnshevksatfvppkssasfklsstaaphgtvtwrlisdygmslephsgsf* +>9 +mrillsnddgvhapgiqtlakalrefadvqvvapdrnrsgasnsltlesslrtftfdngdiavqmgtptdcvylgvnalmrprpdivvsginagpnlgddviysgtvaaamegrhlgfpalavslngyqhydtaaavtcallrglsreplrtgrilnvnvpdlplaqvkgirvtrcgsrhpadkvipqedprgntlywigppgdkydagpdtdfaavdegyvsvtplhvdltahsahdvvsdwldsvgvgtqw* +>10 +vnlvktpglhaagkginvanvlkdlgidvtvggflgkdnqdgfqqlfselgianrfqvvqgrtrinvkltekdgevtnfnfpgfdvtpadwerfvndflswlgqfdmvcvsgslpagvspeaftdwmtrlrsqcpciivdtsrvelvagliaapwlvkpnrreleiwagrklpemkdvidaapalreqgiahvvislgaegalrvnasgewiakppavdventvgaggsmvggwiygllmrestehtlrlatavaalavsqsnvgitdrpqlaammarvdlqpfn* +>11 +mnknkystpllmlatilagmlspmqsavngqlghwlqdgnacavisfasglvvmffiiiarketrqqfasiptlikkrkiplwnwfaglcgamvvfsegasasalgvatfqtalisalllsgllcdrfgigveekkyftpwritgalfaviatifvvspqwhstsfillailpflagllagwqpagnakvaeatgsmlvsitwnfivgfcvlgaalairialghvtiqlpdtwwmylggplgllsiglmailvrglgllmlgvastagqllgsvlidelipslgntvylvtiigtlfalvgaivttipeyraskmaqkmevse* +>12 +mkgrwakyvatgvmlamlaacsskptdrgqqykdgkftqpfslvnqpdavgapinagdfaeqvdqirsasprlytnqsnvynavqnwlrsggdtrtmrqfgidawqmegtdnygnvqftgyytpvvqarhtrqgafqypiysmppkrgrlpsraqiyagalsdkyilawsnslmdnfimdvqgsgyidfgdgsplnffsyagkngwpyrsigkvlidrgevkkedmsmqairewgekhseaevrelleqnpsfvffkpqsfapvkgasavpligrasvasdrsiippgttllaevplldnngkfsgqyelrlmvaldvggaikgqhfdiyqgigpdaghragwynhygrvwvlksapgagnvfsg* +>13 +lfdrydageqavlvhiyfsqdkdmedlqefeslvyyagveamqvitgsrkaphpkyfvgegkaveiaeavkatgaavvlfdhalspaqernlerlcecrvidrtglildifaqrarthegklqvelaqlrhlatrlvrgwthlerrkggigsrgpgetqleadrrllrnrivqiqsrlekvekqreqgrqsrikadvptvslvgytnagkstlfnqitearvyaadqlfatldptlrridvadvgetvladtvgfirhlpydlvaafkatlqetrqatqllhvvdaadvrvqenieavntvleeidahefptlmvmnkidmlddfepridrdeenkpirvwlsaqsgvgipqlfqalterlsgevaqhtlrllpqegrlrsrfyqlqaiekewmeedgsvspqvrmpivdwrrfckqepalieyvi* +>14 +vaqrilvlgasgyigqhlvfalsqqghqvraaarrverlekhrlanvschkvdlhwpenlpallrdidtvyylvhgmgeggdfiaherqaalnvrdalrqtpvkqliflsslqapaheqsdhlrarqltadtlrdagvpvtelragiivgagsaafevmrdmvynlpiltpprwvrsrttpialenllyylvgllehpahehrileaagpqvlsyqqqferfmavsgkrrplipvpfptrwisvwflnvitsvppttakaliqglrhdlladdaalkklipqtlitfddavrrtlkeeeklvnssdwgydalafarwrpeygyfpkqagftaqtpaslsalwqvvnrlggkegyffgnilwqtraamdrlvghklakgrpshtllkpgdtvdswkviivepekqltllfgmkapglgrlsftlhdkgryreidvrawwhphgmpgliywllmipahlfifrgmarriarlaeqitek* +>15 +mnkfarhflplslrvrfllatagvvlvlslaygivalvgysvsfdkttfrllrgesnlfytlakwennkicvelpenldmqsptmtviydetgkllwtqrnipwlikstqpeglktngfheietnvdatstllsedhsaqeklkevreddddaemthsvavniypattrmpqltivvvdtipielkgsymvwswfvyvlaanlllvipllwiaawwslrpidalsrevreledhhremlnpettrklislvrnlnqllkseherynkyrtsltdlthslktplavlqstlrslrnekmsvskaepvmleqisrisqqigyylhrasmrgsgvllsrelhpvaplldnlisalnkvylrkgvnismdispeisfvgeqndfvevmgnvldnackyclefveisarqtddhlhifveddgpgiahskrslvfdrgqradtlrpgqgvglavareiteqyagqiiasdsllggarmevvsgrqhptqkee* +>16 +mtiqkrlleaveqkllrpidaqfaltvagnddpavtlaaallshdageghvclplsrltlteeahpllvawisetatpidwkkrllasaavscgdspaplilcgdrlylnrmwcnertvarffnevnqaiavdedqlsrildalfpptdevnwqkvaaavaltrrisvisggpgtgktttvakllaaliqmadgercrirlaaptgkaaarlteslgaalrqlpltdaqkkripedastlhrllgaqpgsqrlrhhagnplhldvlvvdeasmidlpmmsrlidalpphgrviflgdrdqlasveagavlgdicayvnagftaerarqlsrltgsaipagagtqaaslrdslcllqksyrfgsdsgigklaaaincgdrsaiqavfqqgfsdiekrtlqssddyagmldealagygrylrllhekaapeailqafneyqllcalregpfgvrglndrieqamvqqrkiqrhphsrwyegrpvmiarndsalglfngdigialdrgqglrvwfvmpdgtiksvqpsrlpehdttwamtvhksqgsefdhaalilpsqrspvvtrelvytavtrarrrlslyaderilagaivtrterrsglatlfdevsrig* +>17 +mqevamssqeaskmlrtyniawwgnnyydvnelghisvcpdpdvpearvdlaklvkareaqgqrlpalfcfpqilqhrlrsinaafkraresygyngdyflvypikvnqhrrvieslihsgeplgleagskaelmavlahagmtrsvivcngykdreyirlaligekmghkvylviekmseiaivleeaerlnvvprlgvrarlasqgsgkwqssggekskfglaatqvlqlvetlrdagrldslqllhfhlgsqmanirdiatgvresarfyvelhklgvniqcfdvggglgvdyegtrsqsdcsvnyglneyanniiwaigdaceehglphptvitesgravtahhtvlvsniigverneytdptapaedapralqnlwetwqemhkpgtrrslrewlhdsqmdlhdihigyssgafslqerawaeqlylsmchevqkqldpqnrahrpiidelqermadkmyvnfslfqsmpdawgidqlfpvlplegldqvperravllditcdsdgaidhyidgdgiattmpmpeydpenppmlgffmvgayqeilgnmhnlfgdteavdvfvfpdgsvevelsdegdtvadmlqyvqldpktllthfrdqvkqtdlddalqqqfleefeaglygytylede* +>18 +mnslpqrstdfelttsqdgfalswqqrlilrhsaenpclwigagvadidmfrgnfsikdklnekialteatvselpdgwlvqfsrgatisatlrisadeagrltldlqnddlhhnriwlrlaanpddhiygcgeqfsyfdlrgkpfplwtseqgvgrnktsyvtwqadckenaggdyyltffpqptfvstqkyychvdnscymnfdfsapeyhelalwedkttlrfecadtyiallekltallgrqpelpdwvydgvtlgiqggtevcqqkldnmrnagvkvygiwaqdwsgirmtsfgkrvmwnwkwnsdnypqldsrikqwkeegvqflsyinpyvasdkdlcaeaarhgylakdatggdylvefgefyggvvdltnpeaydwfkdvikknmialgcsgwmadfgeylptdtylhngvsaelmhnawpalwakcnyealqktgklgeilffmragytgsqkystmmwagdqnvdwslddglasvvpaalslamtghglhhsdiggyttlfdmkrskelllrwcdfsaftpmmrthegnrpsndwqfdgdaetiahfarmttvfttlkpylkqavaqnaatglpvmrplflhyendaatytlkyqyllgqdllvapvheqgrcdwtlylpedhwvniwtgeahhggeisvdapigkppvfyraksewallfaslrni* +>19 +mkhlrvvacmimlalagcdnndktapttkseapavaqpspaqdpsqlqklaqqsqgkaltlldaseaqldgaatlvltfsipldpeqdfsrvvhvvdkksgsvdgawelapnlkelrlrhlepervlvvtvdpavkalnnatfgksyektittrdvqpsvgfasrgsllpgkiaeglpvmalnvnhvdvnffrvkpgslasfvsqweyrsslsnwesdnllkmadlvytgrfdlnparntreklllplsdikplqqagvyvavmnqaghynysnaatlftlsdigvsahryhsrldiftqslengaaqsgieivllndkgqtlaqatsdaqghvqleadkaaalllarkegqttlldltlpaldlsefnvagapgyskqffmfgprdlyrpgetvilngllrdsdgktlpdqpvklevvkpdgqvmrtvvsqpenglyrlnypldinaptglwhvrantgdnllrswdfhvedfmpermalnltaqktplapadevkfsvvgyylygapangntlqgqlflrplrdavaalpgfqfgniaeenlsrsldevqltldkggrgevsaasqwqeahsplqvilqasllesggrpvtrrveqaiwpadtlpgirpqfaakavydyrtdttvnqpivdedsnaafdivyanaqgekkavsglqvrlirerrdyywnwsesegwqsqfdqkdlvegeqtldlnadetgkvsfpvewgayrlevkapnetvssvrfwagyswqdnsdgsgaarpdrvtlkldkanyrpgdtmklhiaapvagkgyamvessdgplwwqaidvpaqgleltipvdktwnrhdlylstlvvrpgdksrsatpkravgllhlplgddnrrldlalespakmrpnqpltvrvkasvkhgempkqinvlvsavdsgvlnitdyatpdpwqaffgqkrygadiydiygqviegqgrlaalrfggdgddltrggkppvnhaniiaqqaqpitlneqgegvvtlpigdfngelrvmaqawtaddfgrgeskvvvaapviaelnmprflaggdvsrlvldvtnltdrpqtlnialaasgllellsqqpqpvnlapgvrttlfvpvralegfgegeiqatisglnlpgetlgaqhkqwqigvrpawpaqtvnsgialapgeswhvpeqhlanvspatlqgqlllsgkpplnlaryirelkaypygcleqttsglfpalytnaaqlqslgitgdsdekrraavdigisrilqmqrdnggfalwdengaeepwltayamdflirageqgysvppeainrgnerllrylqdpgtmlirysdntqastfaaqayaalvlarqqkaplgalreiwerrsqaasglplmqlgialntmgdarrgeeaitlalntprqderqwiadygsslrdnalmlslleennlrpdaqnallsslseqafgqrwlstqennalflaahsrqasagawqaqtsleaqplsgdkaltrnldadqlaalevtntgsqplwlrldssgypssapepasnvlqierqilgtdgqrkslsslrsgelvlvwltvvadrnvpdalvvdllpaglelenqnladssaslpesgsevqnllnqmqqadiqymefrddrfvaavvvnegqpvtlvylaravtpgtyqlaqpqvesmyapqwratgasegllivtp* diff --git a/tests/test_data/outputs/locidex_db/blast/protein/protein.pdb b/tests/test_data/outputs/locidex_db/blast/protein/protein.pdb new file mode 100755 index 0000000000000000000000000000000000000000..d8c528067647b6400a5fdf8d0313c7a2cad70be2 GIT binary patch literal 20480 zcmeI%K}y3w6adg~6q;Q`dIxdm5xk0v77Ch(i0fXLc8LzHSR`qJ}w(a(SGJN_5SK!5-N0t5&UAV7cs0RjXF z=U(BT*bOt>;5yXkpKY#1PBlyK!5-N0t5)0q`;)BI<@{! w&&|thQOEv|fUf`FR2%*ONxoY}6Cgl<009C72oNAZfB=E9K)0X?8GE&u=k literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/locidex_db/blast/protein/protein.phr b/tests/test_data/outputs/locidex_db/blast/protein/protein.phr new file mode 100755 index 0000000000000000000000000000000000000000..5c45aa957a4f0d1cd7818e3a55a2b3d1e2230b5f GIT binary patch literal 1290 zcmajdF%E(-07l`qf(U}5Zl0n=5M50d6B83J(5v()=;$fcK!PObV=7I)*X7q<`^Imj z7UKRsKHJ~;baB_4b!YpzZoHD>`+M{ky7LD7ATZ(!@Cx4FfuDdc!3W?g@FDm;_$l}r zd<6ageg^&sKK}2Op#dfopasq;Kod+UKpV^`KqFjGfL6Gq0L?I`0PS!^0UF|(0<^@< GkH8C-tr(mD literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/locidex_db/blast/protein/protein.pin b/tests/test_data/outputs/locidex_db/blast/protein/protein.pin new file mode 100755 index 0000000000000000000000000000000000000000..d82020df7a61c1140cdf102496cf9ef7e1c9a30b GIT binary patch literal 280 zcmXw!%L)Nu7>3_5PLU+aO8rSznh}z$MixqmJJ5`wCS%BJ@#8meLIv$;B%j^};L`CDbcq)uh{S5PLV!7$le zIHH^rg?ynTM5S6PidyU25lF04t^@c-KTo6GlB+vf(r6h=j6EP4*^(O|Ga=W>j@$vS zpESsxJh*-cZj%fxHFz=H@Nb;3E+45IQrF@9HV|-t-~|Y!dBhWloPp>Gh`qV$ikC2- GI(z`nvn#>? literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/locidex_db/blast/protein/protein.pjs b/tests/test_data/outputs/locidex_db/blast/protein/protein.pjs new file mode 100755 index 0000000..fd87ea1 --- /dev/null +++ b/tests/test_data/outputs/locidex_db/blast/protein/protein.pjs @@ -0,0 +1,22 @@ +{ + "version": "1.2", + "dbname": "protein", + "dbtype": "Protein", + "db-version": 5, + "description": "locidex_db/blast/protein/protein.fasta", + "number-of-letters": 7547, + "number-of-sequences": 20, + "last-updated": "2024-06-06T08:43:00", + "number-of-volumes": 1, + "bytes-total": 46334, + "bytes-to-cache": 7848, + "files": [ + "protein.pdb", + "protein.phr", + "protein.pin", + "protein.pot", + "protein.psq", + "protein.ptf", + "protein.pto" + ] +} diff --git a/tests/test_data/outputs/locidex_db/blast/protein/protein.pot b/tests/test_data/outputs/locidex_db/blast/protein/protein.pot new file mode 100755 index 0000000000000000000000000000000000000000..754c9ab7fa1e8f21ae1d5ba10426f2e2f8bdb1c3 GIT binary patch literal 248 zcmb`&w*i14001yMq5>BFzm6_3`Gk)z<}97Nd5mm+Q*q0jk>Fn7qGAI;{4U;dyg8N*1X+{(dC$PK4%?O z4dVa()uK`Nc22k3x|dXmZuxz3ziziBvcuXbJ69$6)7(fS<(zg3az5vH#m>X|U{6@B z^nS(}J*f6Hgy*}`=9*JyDK%vu;s`2n9d~E!LG}t^WMB62KIZ064!sq1(aVyj?gr8P z)e*9kZ-_%6E#k6JAh%cACxU`%;*L8x|ST>&Y$#aZe;Up7YuqI#j0<%2Zk@x7@r-4ei}H z=P52(WlQwyOZ`OU(?2&(*SXeNg&K{%+$dqFYpw}(`Nq=zPG4RrSy{T;$efnGm9|`7 zYv%Zc^2*q}k7`FNWgoB3Rl=c+sqPRi~%E{D>}+;u1~&L!98r#CT@5)o$`8+;`3;) z>07JqGhH6b?aR-_pz99Mt^Na$nOEdSNomR8H2GxRsijZ%%g0U!WTNlk{Yq1x3!I2q z^Ah6eHV?8IFU`OjL`h-vrstqxb61r-Tg`D#$|M_bgaTwJw9j)L@!{m{iwj3i&m2Zr(Z|^4 z=4Vd>#T*{!&7Hm5qbm=oljAsMsN6H?kTbBSTne;8$>D(n3fzKdxW&}NFt_G|)$W8= z{3@>L84G9}=jrxtWgMt!_fE(73>j27&%?FFb!BX4SoK|Pxk&CSgJHK^niy!vFKV5VNb%MH`k1O5+{kYvX7hAUNOVF zNMGc1{7LSh#?6)Eg4^6)&&vC@n2%9E)#BE9We6 zjPU?ZiO>+zaN0nncvQFP0mn#;9}tFHed^oIdi5G}*DlBr@f)F;gF<M2xZ^v$Yd9|h0SwZ*u2X=!r% zn^Xuv&YT{)DZ$rOaWFGnd`DUVDhIVtf9orr7?D1p(HUVnjbF$@fv73^l{0g@P0jUP=^>O^5ZJlq&nbG_@qFO9f%zXC@16#Zm>Im&6(KxM1{H=!MxejFMLqAhoLkh zVbNh5ra8x=gYk7kVK`?BG3D;WTV@f?y{2Fj(7yohPpGcVd3u&{dsuAMJ~ae={_o_@Zf31BNJ>2 z%nl-FeAq~rLkYKr0C;Rn&#WDSyK}~`i~xP*@J@z)sUDNYOd8tgGP+8&$B@LEkOhn< zp9HKRtafmo1#U{WhX#>SM4!3zV-6E0KQIpwQpDymuMCSd`>Y8s#UkQ*9^{1_{1xJP#Su2@4{ z{x%DG_n3`Phq_=6NnToZlbAvdy8-si?H?47L@|Ts9~bIQP|#`lb+MA~pO1Pe>Pi8)OlNG&s~FYs0cnP+tp}b(`xsrwqA_!21>pD) zQ8Jxwj7VA>5=(P*a5{|eY@t+3P%d=jf!e_Vj6jCK6{cHt!o^rnK1~S#SVxHRCzP!q zLC_2LSk-521tJU?41C7{zfdACB`Us8@#_2*&~Fd=69P&ppd{vH74zfJrod%35S@ew zwlN6we1`#F4itU)g$9b72phl4f+euRE9l)92p zGs;VD;YNUs0PGTXB0c97NbHpM;ugRPW5~>lhkltMOalNWh0J=t7ADW0xcY!-I;^M< z5F@3-X=`M*2o~)QTmTQ@AV72BQ4o1T99ZMs9F!yFQ62Veegp#kPCzV1Je z-5xzJ6Lt5P0op<#EbFZS>!rjc@tLR<_J_5>su|l_EB7;)bmj%kz)C`?c&vr^$S=3# z&pm9Ljc@o7l2ruuVWwaq@(vR{sXEDWl=Uz~^Z+qogq_RH3c`yKI@~D>2oLW<7XzN~ z^b96vP2vQZ1SSYT;AU9Q3=dK_m|8L2Gf0nOZik#oSi+c%fma(C?X<|$Fgw6xu$woh zh8AJ{!4268oG{l+N$sAU31NvH0N#QOzV~&^YAOpW5llrXHLhdKg)1QqqQh8Fi)6^* z(#O7q=5cRq)I?vfH?Rfzi;J4sV&*_^>RN*WljkQpi_9<*!-)N-Va*dQ{<`nzr)i~a z9dOZrm>fx?P>lFR615H*i2ES4M@x!fL8RR3MGn<-0Z|F%$siz;lL#avTU3D&#Tr5g zG#u{^p3X;xDFnG^;KAB3%P_1tLwZpcW+I!&(q35qVB)B>QKVHxh;r<6c+V=4%c9b- zf~DV|CXpFYEZP={Obe8@jNX}vFg z7E3^YTH~?y=a&Z;KH*3)q|{*7UXeex60E+k=rIy<*ban@F|P>$LDyg-e{i5+laZrZ zi%^K%IXupI>=}G72*gg2vBV@m&uD|1degE<#}!;`HU{1}4tLx^FY%If(Z*D=RV3Xp zEC73=F`VT?A(&h~0waYaa>HCatGG}Y7*2$*L(RMp?*@A*mU)me{pSA+D|(Buqu`#p zCLAIzA6zunTNqhkvB#TU^r&RPP#}N{*LY;MvezTwv$gyR%P5=>>cKj#kIp&(bu5ns z;PGvJwlDB3f~a4@u+m%8URF`zgZ&@p#BgDaptxyU&+hMe><2d4m-WjI=Npi`ZFv`Xn zDKOR?rCXx$urbvkv#?mQB_qRtyup_2_Ba_XZG&I-V?7LR%u_8xuoGjRc*O-hYjV^3AD&E{3*TmmX%#`vDQiulIBC! zEV^yh)9f43yb=l%unhVnu*jlIO5UfE`{o zrXiVHh{tD9YIHS`^-0C}1NwtHnDWE-m^zo4DZC+ukv$O$Hj?u5xLSnXwid*@;H}i; zbV4fMsUT~-1`#L8DcgZLp(I#g1-3p2AJZ}z{}UiJBfZ9D7*wJF-(g(HoZJVBdZUnh z!!RSm`k-4V%)Uo~GgwVRN-ztK>u<~^JY`$|l%UWk$K0_G8NoPGzz ztb{Cep;G|;*xL)G!G7WhEZ7mFZF$KGj>8f8n3%%ka#vifeIK$;v9ATlUu*%9Pt?YI zdSjLLc#Q9SDDewpOlR0$@E=`gOU`sd>JTnm$0y)T$!bh!9Hb3=ib@d_96v6WlGHlcj#x<0I@s zJb`^Xt=HmSN4^L*2qzAb*|F6IM8gQOd&J^}F^iN{ho%d@iged^f-sIQH!bK;D3rFAgX;#*Kv}km61_n9Pb6qx36F zSNpn#JGEWm@JVfXEKI-;`W#r(LG)vHZu@T{6>CA85b`Nw22=18+b^KTj*d_EC7UG+ zKcyi|6$=Tzcws|F@!%Z#OND?fttZ#oMu+3s2HB(uxI(BNL-jgK{La3Ft)Gs}A+R?a z?wIyn8YjVcWK(D?Lc@1YSi@p{p+HqW9@Fr1T_$d!a|DnwR0}#*Gvzd|71X z{g6z)-S85T*RV^r-#=duIdzJkyQgkAju_6m?CR~ezpidNTg?3z?>t+V5+Fc;009C7 z2oNAZfB*pkw+qz1`!D+c`t+KWnseOrpYYhnoWK3cJb?fK0t5&UAV7cs0RjXF5J-XS zzayHM#HQV}xgruEK!5-N0t5&UAV7cs0RsO);N@6E6QfwqKVBEpx7F%>xjxnP9Q72v tW~JgB=Xra^(*N(D`o%y literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/locidex_db/blast/protein/protein.pto b/tests/test_data/outputs/locidex_db/blast/protein/protein.pto new file mode 100755 index 0000000000000000000000000000000000000000..91d3a927c3e718edb4b7b0774fed440a3c193069 GIT binary patch literal 84 vcmXBF2?~H9002QdP16qZ|8H%;g<*RbO*XTzvau66NMue5m5ZB)m&V5r8>|4+ literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/locidex_db/config.json b/tests/test_data/outputs/locidex_db/config.json new file mode 100755 index 0000000..33bc44e --- /dev/null +++ b/tests/test_data/outputs/locidex_db/config.json @@ -0,0 +1,12 @@ +{ + "db_name": "Locidex Test Database", + "db_version": "1.0.0", + "db_date": "2024/06/06", + "db_author": "James Robertson", + "db_desc": "Using Salmonella GCA_000007545.2 as source data", + "db_num_seqs": 20, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" +} \ No newline at end of file diff --git a/tests/test_data/outputs/locidex_db/meta.json b/tests/test_data/outputs/locidex_db/meta.json new file mode 100755 index 0000000..b0c01b1 --- /dev/null +++ b/tests/test_data/outputs/locidex_db/meta.json @@ -0,0 +1,455 @@ +{ + "info": { + "num_seqs": 20, + "is_cds": "True", + "trans_table": 11, + "dna_min_len": 71.4, + "dna_max_len": 3454.5, + "dna_min_ident": 80, + "aa_min_len": 23.8, + "aa_max_len": 1151.5, + "aa_min_ident": 64 + }, + "meta": { + "0": { + "seq_id": 0, + "locus_name": "locus_1", + "locus_name_alt": "SALM_11273", + "locus_product": "!", + "locus_description": "hypothetical protein", + "locus_uid": "1", + "dna_seq_len": 102, + "dna_seq_hash": "d17b02d12afa7f832ee37df6f24a8f55", + "aa_seq_len": 34, + "aa_seq_hash": "84ac553cb45dd790c497c27152888f02", + "dna_min_len": 71.4, + "dna_max_len": 132.6, + "aa_min_len": 23.8, + "aa_max_len": 44.2, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "1": { + "seq_id": 1, + "locus_name": "locus_2", + "locus_name_alt": "SALM_120", + "locus_product": "@", + "locus_description": "outer membrane protein", + "locus_uid": "B", + "dna_seq_len": 285, + "dna_seq_hash": "e35184c8ff18e9116fc8faef20532f56", + "aa_seq_len": 95, + "aa_seq_hash": "2d9e7f7f0d11bf02db860b0799e7924d", + "dna_min_len": 199.5, + "dna_max_len": 370.5, + "aa_min_len": 66.5, + "aa_max_len": 123.5, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "2": { + "seq_id": 2, + "locus_name": "locus_3", + "locus_name_alt": "SALM_2016", + "locus_product": "#", + "locus_description": "tRNA (guanosine(46)-N7)-methyltransferase TrmB", + "locus_uid": "C", + "dna_seq_len": 327, + "dna_seq_hash": "670705cd2a59c4a23a897ac656a888fe", + "aa_seq_len": 109, + "aa_seq_hash": "4277f8bbf1fd6682001da089fea83d04", + "dna_min_len": 228.9, + "dna_max_len": 425.1, + "aa_min_len": 76.3, + "aa_max_len": 141.7, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "3": { + "seq_id": 3, + "locus_name": "locus_4", + "locus_name_alt": "SALM_8644", + "locus_product": "$", + "locus_description": "AZ624_004720", + "locus_uid": "AZ624_004720", + "dna_seq_len": 417, + "dna_seq_hash": "ac1b21798c0f672ad26f5a91ea278590", + "aa_seq_len": 139, + "aa_seq_hash": "0c25367401155278f34832f184ab44e6", + "dna_min_len": 291.9, + "dna_max_len": 542.1, + "aa_min_len": 97.3, + "aa_max_len": 180.7, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "4": { + "seq_id": 4, + "locus_name": "locus_5", + "locus_name_alt": "SALM_1876", + "locus_product": "%", + "locus_description": "SPI-1 type III secretion system invasion lipoprotein InvH", + "locus_uid": "E", + "dna_seq_len": 444, + "dna_seq_hash": "d00defcca8588f21ce16fa1d0ac13389", + "aa_seq_len": 148, + "aa_seq_hash": "c8bf12a8057fc5e541fcd4924136a40d", + "dna_min_len": 310.8, + "dna_max_len": 577.2, + "aa_min_len": 103.6, + "aa_max_len": 192.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "5": { + "seq_id": 5, + "locus_name": "locus_6", + "locus_name_alt": "SALM_640", + "locus_product": "^", + "locus_description": "MOSC domain-containing protein", + "locus_uid": "F", + "dna_seq_len": 543, + "dna_seq_hash": "a11561f2804e2c32c78049f8b9aeb517", + "aa_seq_len": 181, + "aa_seq_hash": "1f6a45ea291940ef2c17ec1cfdd5fbdd", + "dna_min_len": 380.1, + "dna_max_len": 705.9, + "aa_min_len": 126.7, + "aa_max_len": 235.3, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "6": { + "seq_id": 6, + "locus_name": "locus_7", + "locus_name_alt": "SALM_1501", + "locus_product": "&", + "locus_description": "India: Vellore", + "locus_uid": "G", + "dna_seq_len": 606, + "dna_seq_hash": "dc94bf1ec4ff9bed2a1f460cbd958656", + "aa_seq_len": 202, + "aa_seq_hash": "62252b3326997117f127efb88ff09294", + "dna_min_len": 424.2, + "dna_max_len": 787.8, + "aa_min_len": 141.4, + "aa_max_len": 262.6, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "7": { + "seq_id": 7, + "locus_name": "locus_8", + "locus_name_alt": "SALM_756", + "locus_product": "*", + "locus_description": "DNA polymerase III subunit delta'", + "locus_uid": "H", + "dna_seq_len": 642, + "dna_seq_hash": "7ebe74afecf146ec4db816c8deced64f", + "aa_seq_len": 214, + "aa_seq_hash": "2449629747a7c58f0f2cad411db87178", + "dna_min_len": 449.4, + "dna_max_len": 834.6, + "aa_min_len": 149.8, + "aa_max_len": 278.2, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "8": { + "seq_id": 8, + "locus_name": "locus_9", + "locus_name_alt": "SALM_7353", + "locus_product": "1", + "locus_description": "fimbrial assembly chaperone", + "locus_uid": "I", + "dna_seq_len": 684, + "dna_seq_hash": "41ebb36872854b2b33c8c028e23d8ad1", + "aa_seq_len": 228, + "aa_seq_hash": "a75a93991228940fb46d917f238beb5a", + "dna_min_len": 478.8, + "dna_max_len": 889.2, + "aa_min_len": 159.6, + "aa_max_len": 296.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "9": { + "seq_id": 9, + "locus_name": "locus_10", + "locus_name_alt": "SALM_1891", + "locus_product": "200.96", + "locus_description": "5'/3'-nucleotidase SurE", + "locus_uid": "J", + "dna_seq_len": 762, + "dna_seq_hash": "fe04d17ec353c08b903c85fc0ca4dc02", + "aa_seq_len": 254, + "aa_seq_hash": "bd09702e070040e0fc8d2ec3b830812c", + "dna_min_len": 533.4, + "dna_max_len": 990.6, + "aa_min_len": 177.8, + "aa_max_len": 330.2, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "10": { + "seq_id": 10, + "locus_name": "locus_11", + "locus_name_alt": "SALM_1452", + "locus_product": "|", + "locus_description": "1-phosphofructokinase", + "locus_uid": "K", + "dna_seq_len": 858, + "dna_seq_hash": "5b128d659955716833ce42f2bb060212", + "aa_seq_len": 286, + "aa_seq_hash": "4daecf1a6ccae76e1d97a4ce9ee4ff0b", + "dna_min_len": 600.6, + "dna_max_len": 1115.4, + "aa_min_len": 200.2, + "aa_max_len": 371.8, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "11": { + "seq_id": 11, + "locus_name": "locus_12", + "locus_name_alt": "SALM_11020", + "locus_product": "_", + "locus_description": "1", + "locus_uid": "L", + "dna_seq_len": 972, + "dna_seq_hash": "eb72da68c159497d5f0c8eeddc51b5ae", + "aa_seq_len": 324, + "aa_seq_hash": "a1f16dd269dc295e715f2d00f9c26b43", + "dna_min_len": 680.4, + "dna_max_len": 1263.6, + "aa_min_len": 226.8, + "aa_max_len": 421.2, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "12": { + "seq_id": 12, + "locus_name": "locus_13", + "locus_name_alt": "SALM_1934", + "locus_product": "-", + "locus_description": "3.9", + "locus_uid": "M", + "dna_seq_len": 1098, + "dna_seq_hash": "8f300259dcb46224bdc1fe5273107324", + "aa_seq_len": 366, + "aa_seq_hash": "b16bc26e42f5bc4327504b1ec8b2d53d", + "dna_min_len": 768.6, + "dna_max_len": 1427.4, + "aa_min_len": 256.2, + "aa_max_len": 475.8, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "13": { + "seq_id": 13, + "locus_name": "locus_14", + "locus_name_alt": "SALM_2871", + "locus_product": "+", + "locus_description": "@", + "locus_uid": "N", + "dna_seq_len": 1281, + "dna_seq_hash": "b9060019038526aa6fc38d2f7510edc6", + "aa_seq_len": 427, + "aa_seq_hash": "ed11561b2bedaa12c7a28eb0e9346101", + "dna_min_len": 896.7, + "dna_max_len": 1665.3, + "aa_min_len": 298.9, + "aa_max_len": 555.1, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "14": { + "seq_id": 14, + "locus_name": "locus_15", + "locus_name_alt": "SALM_583", + "locus_product": "=", + "locus_description": "DMT family transporter", + "locus_uid": "O", + "dna_seq_len": 1434, + "dna_seq_hash": "bc98c2fe196a68a79036814396513a8d", + "aa_seq_len": 478, + "aa_seq_hash": "16cb2acec887d5861327e04f2705b8ce", + "dna_min_len": 1003.8, + "dna_max_len": 1864.2, + "aa_min_len": 334.6, + "aa_max_len": 621.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "15": { + "seq_id": 15, + "locus_name": "locus_16", + "locus_name_alt": "SALM_780", + "locus_product": "<", + "locus_description": "murein transglycosylase A", + "locus_uid": "P", + "dna_seq_len": 1464, + "dna_seq_hash": "16e55766c603fe33c9e75d8e81743ae2", + "aa_seq_len": 488, + "aa_seq_hash": "b20314e55f9713235e9c4ea5817b56df", + "dna_min_len": 1024.8, + "dna_max_len": 1903.2, + "aa_min_len": 341.6, + "aa_max_len": 634.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "16": { + "seq_id": 16, + "locus_name": "locus_17", + "locus_name_alt": "SALM_1937", + "locus_product": ">", + "locus_description": "GTPase HflX", + "locus_uid": "Q", + "dna_seq_len": 1836, + "dna_seq_hash": "a0d97d985483413f3c18bfe5833ae9ce", + "aa_seq_len": 612, + "aa_seq_hash": "d6012eddc7a6e8d7d40761d00ed71a5a", + "dna_min_len": 1285.2, + "dna_max_len": 2386.8, + "aa_min_len": 428.4, + "aa_max_len": 795.6, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "17": { + "seq_id": 17, + "locus_name": "locus_18", + "locus_name_alt": "SALM_1997", + "locus_product": "?", + "locus_description": "biosynthetic arginine decarboxylase", + "locus_uid": "R", + "dna_seq_len": 1914, + "dna_seq_hash": "b3021e979faa7600756c06dfadfcf14c", + "aa_seq_len": 638, + "aa_seq_hash": "e68e83956ee1d4c685571e5348c8def1", + "dna_min_len": 1339.8, + "dna_max_len": 2488.2, + "aa_min_len": 446.6, + "aa_max_len": 829.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "18": { + "seq_id": 18, + "locus_name": "locus_19", + "locus_name_alt": "SALM_9926", + "locus_product": ",", + "locus_description": "https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662", + "locus_uid": "S", + "dna_seq_len": 2037, + "dna_seq_hash": "a012eee23637b48e39b00808a057e35d", + "aa_seq_len": 679, + "aa_seq_hash": "1302fea1dbd5bb756db34e09b863ad44", + "dna_min_len": 1425.9, + "dna_max_len": 2648.1, + "aa_min_len": 475.3, + "aa_max_len": 882.7, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "19": { + "seq_id": 19, + "locus_name": "locus_20", + "locus_name_alt": "SALM_6064", + "locus_product": ".", + "locus_description": "alpha-2-macroglobulin family protein", + "locus_uid": "T", + "dna_seq_len": 4935, + "dna_seq_hash": "4461918e985715e4a2b07494e1f91326", + "aa_seq_len": 1645, + "aa_seq_hash": "12ff39a1933fa478729b142976b0a659", + "dna_min_len": 3454.5, + "dna_max_len": 6415.5, + "aa_min_len": 1151.5, + "aa_max_len": 2138.5, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + } + } +} \ No newline at end of file diff --git a/tests/test_data/outputs/locidex_db/results.json b/tests/test_data/outputs/locidex_db/results.json new file mode 100755 index 0000000..509bbcc --- /dev/null +++ b/tests/test_data/outputs/locidex_db/results.json @@ -0,0 +1,15 @@ +{ + "analysis_start_time": "2024-06-06 08:43:01", + "parameters": { + "input_file": "locidex.loci.txt", + "outdir": "locidex_db", + "name": "test", + "author": "James", + "date": "", + "db_ver": "1.0.0", + "db_desc": "desc", + "force": false + }, + "result_file": "locidex_db", + "analysis_end_time": "2024-06-06 08:43:03" +} \ No newline at end of file diff --git a/tests/test_data/outputs/merge/conservative/profile.tsv b/tests/test_data/outputs/merge/conservative/profile.tsv new file mode 100755 index 0000000..6082f0b --- /dev/null +++ b/tests/test_data/outputs/merge/conservative/profile.tsv @@ -0,0 +1,15 @@ +sample_id locus_1 locus_2 locus_3 locus_4 locus_5 locus_6 locus_7 locus_8 locus_9 locus_10 locus_11 locus_12 locus_13 locus_14 locus_15 locus_16 locus_17 locus_18 locus_19 locus_20 +G10 d17b02d12afa7f832ee37df6f24a8f55 e35184c8ff18e9116fc8faef20532f56 670705cd2a59c4a23a897ac656a888fe ac1b21798c0f672ad26f5a91ea278590 d00defcca8588f21ce16fa1d0ac13389 a11561f2804e2c32c78049f8b9aeb517 dc94bf1ec4ff9bed2a1f460cbd958656 7ebe74afecf146ec4db816c8deced64f 41ebb36872854b2b33c8c028e23d8ad1 fe04d17ec353c08b903c85fc0ca4dc02 - eb72da68c159497d5f0c8eeddc51b5ae 8f300259dcb46224bdc1fe5273107324 b9060019038526aa6fc38d2f7510edc6 bc98c2fe196a68a79036814396513a8d 16e55766c603fe33c9e75d8e81743ae2 a0d97d985483413f3c18bfe5833ae9ce b3021e979faa7600756c06dfadfcf14c a012eee23637b48e39b00808a057e35d 4461918e985715e4a2b07494e1f91326 +G12 d17b02d12afa7f832ee37df6f24a8f55 e35184c8ff18e9116fc8faef20532f56 670705cd2a59c4a23a897ac656a888fe ac1b21798c0f672ad26f5a91ea278590 d00defcca8588f21ce16fa1d0ac13389 a11561f2804e2c32c78049f8b9aeb517 dc94bf1ec4ff9bed2a1f460cbd958656 7ebe74afecf146ec4db816c8deced64f 41ebb36872854b2b33c8c028e23d8ad1 fe04d17ec353c08b903c85fc0ca4dc02 5b128d659955716833ce42f2bb060212 - 8f300259dcb46224bdc1fe5273107324 b9060019038526aa6fc38d2f7510edc6 bc98c2fe196a68a79036814396513a8d 16e55766c603fe33c9e75d8e81743ae2 a0d97d985483413f3c18bfe5833ae9ce b3021e979faa7600756c06dfadfcf14c a012eee23637b48e39b00808a057e35d 4461918e985715e4a2b07494e1f91326 +G13 d17b02d12afa7f832ee37df6f24a8f55 8b70e777f6bbf2c91ff75947824b5976 670705cd2a59c4a23a897ac656a888fe 73790840c76943caac0ebb3b2b3f0b98 8cf4341689dd00f74adfcc43d1f4a35e a11561f2804e2c32c78049f8b9aeb517 49d9878c9d3071aa1d2f26cb947b784c 7ebe74afecf146ec4db816c8deced64f 41ebb36872854b2b33c8c028e23d8ad1 fe04d17ec353c08b903c85fc0ca4dc02 c4266f2f24fdd8e039113c6b0955af9f eb72da68c159497d5f0c8eeddc51b5ae - 2fa0b06ed72e36b4071cab9d0b4f87d0 bc98c2fe196a68a79036814396513a8d a9b3cb97dac3cda6e932a49bf9a507bd a0d97d985483413f3c18bfe5833ae9ce b3021e979faa7600756c06dfadfcf14c de32372598811d63bcc1a0eaf6872644 4461918e985715e4a2b07494e1f91326 +G5 e9e707ebc64e10a881f1323ebff85369 8b70e777f6bbf2c91ff75947824b5976 670705cd2a59c4a23a897ac656a888fe 73790840c76943caac0ebb3b2b3f0b98 8cf4341689dd00f74adfcc43d1f4a35e a11561f2804e2c32c78049f8b9aeb517 49d9878c9d3071aa1d2f26cb947b784c 7ebe74afecf146ec4db816c8deced64f 41ebb36872854b2b33c8c028e23d8ad1 fe04d17ec353c08b903c85fc0ca4dc02 c4266f2f24fdd8e039113c6b0955af9f eb72da68c159497d5f0c8eeddc51b5ae 8f300259dcb46224bdc1fe5273107324 2fa0b06ed72e36b4071cab9d0b4f87d0 bc98c2fe196a68a79036814396513a8d a9b3cb97dac3cda6e932a49bf9a507bd a0d97d985483413f3c18bfe5833ae9ce b3021e979faa7600756c06dfadfcf14c de32372598811d63bcc1a0eaf6872644 4461918e985715e4a2b07494e1f91326 +G7 d17b02d12afa7f832ee37df6f24a8f55 8b70e777f6bbf2c91ff75947824b5976 670705cd2a59c4a23a897ac656a888fe 73790840c76943caac0ebb3b2b3f0b98 8cf4341689dd00f74adfcc43d1f4a35e a11561f2804e2c32c78049f8b9aeb517 49d9878c9d3071aa1d2f26cb947b784c 7ebe74afecf146ec4db816c8deced64f 41ebb36872854b2b33c8c028e23d8ad1 796419469778f7ec3851c813f59cfff7 c4266f2f24fdd8e039113c6b0955af9f eb72da68c159497d5f0c8eeddc51b5ae 8f300259dcb46224bdc1fe5273107324 2fa0b06ed72e36b4071cab9d0b4f87d0 bc98c2fe196a68a79036814396513a8d a9b3cb97dac3cda6e932a49bf9a507bd a0d97d985483413f3c18bfe5833ae9ce b3021e979faa7600756c06dfadfcf14c de32372598811d63bcc1a0eaf6872644 4461918e985715e4a2b07494e1f91326 +G8 d17b02d12afa7f832ee37df6f24a8f55 e35184c8ff18e9116fc8faef20532f56 670705cd2a59c4a23a897ac656a888fe ac1b21798c0f672ad26f5a91ea278590 d00defcca8588f21ce16fa1d0ac13389 a11561f2804e2c32c78049f8b9aeb517 dc94bf1ec4ff9bed2a1f460cbd958656 7ebe74afecf146ec4db816c8deced64f 41ebb36872854b2b33c8c028e23d8ad1 fd6284b58a891cf02058906c9ee37a00 5b128d659955716833ce42f2bb060212 eb72da68c159497d5f0c8eeddc51b5ae 8f300259dcb46224bdc1fe5273107324 b9060019038526aa6fc38d2f7510edc6 bc98c2fe196a68a79036814396513a8d 16e55766c603fe33c9e75d8e81743ae2 a0d97d985483413f3c18bfe5833ae9ce b3021e979faa7600756c06dfadfcf14c a012eee23637b48e39b00808a057e35d 4461918e985715e4a2b07494e1f91326 +G1 d17b02d12afa7f832ee37df6f24a8f55 8b70e777f6bbf2c91ff75947824b5976 670705cd2a59c4a23a897ac656a888fe 73790840c76943caac0ebb3b2b3f0b98 8cf4341689dd00f74adfcc43d1f4a35e a11561f2804e2c32c78049f8b9aeb517 49d9878c9d3071aa1d2f26cb947b784c 7ebe74afecf146ec4db816c8deced64f 41ebb36872854b2b33c8c028e23d8ad1 fe04d17ec353c08b903c85fc0ca4dc02 c4266f2f24fdd8e039113c6b0955af9f eb72da68c159497d5f0c8eeddc51b5ae 8f300259dcb46224bdc1fe5273107324 2fa0b06ed72e36b4071cab9d0b4f87d0 bc98c2fe196a68a79036814396513a8d a9b3cb97dac3cda6e932a49bf9a507bd a0d97d985483413f3c18bfe5833ae9ce b3021e979faa7600756c06dfadfcf14c de32372598811d63bcc1a0eaf6872644 4461918e985715e4a2b07494e1f91326 +G2 d17b02d12afa7f832ee37df6f24a8f55 e35184c8ff18e9116fc8faef20532f56 670705cd2a59c4a23a897ac656a888fe ac1b21798c0f672ad26f5a91ea278590 d00defcca8588f21ce16fa1d0ac13389 a11561f2804e2c32c78049f8b9aeb517 dc94bf1ec4ff9bed2a1f460cbd958656 7ebe74afecf146ec4db816c8deced64f 41ebb36872854b2b33c8c028e23d8ad1 fe04d17ec353c08b903c85fc0ca4dc02 5b128d659955716833ce42f2bb060212 eb72da68c159497d5f0c8eeddc51b5ae 8f300259dcb46224bdc1fe5273107324 b9060019038526aa6fc38d2f7510edc6 bc98c2fe196a68a79036814396513a8d 16e55766c603fe33c9e75d8e81743ae2 a0d97d985483413f3c18bfe5833ae9ce b3021e979faa7600756c06dfadfcf14c a012eee23637b48e39b00808a057e35d 4461918e985715e4a2b07494e1f91326 +G6 a47cc24760462371e919143c5cc81376 e35184c8ff18e9116fc8faef20532f56 670705cd2a59c4a23a897ac656a888fe ac1b21798c0f672ad26f5a91ea278590 d00defcca8588f21ce16fa1d0ac13389 a11561f2804e2c32c78049f8b9aeb517 dc94bf1ec4ff9bed2a1f460cbd958656 7ebe74afecf146ec4db816c8deced64f 41ebb36872854b2b33c8c028e23d8ad1 fe04d17ec353c08b903c85fc0ca4dc02 5b128d659955716833ce42f2bb060212 eb72da68c159497d5f0c8eeddc51b5ae 8f300259dcb46224bdc1fe5273107324 b9060019038526aa6fc38d2f7510edc6 bc98c2fe196a68a79036814396513a8d 16e55766c603fe33c9e75d8e81743ae2 a0d97d985483413f3c18bfe5833ae9ce b3021e979faa7600756c06dfadfcf14c a012eee23637b48e39b00808a057e35d 4461918e985715e4a2b07494e1f91326 +G9 d17b02d12afa7f832ee37df6f24a8f55 8b70e777f6bbf2c91ff75947824b5976 670705cd2a59c4a23a897ac656a888fe 73790840c76943caac0ebb3b2b3f0b98 8cf4341689dd00f74adfcc43d1f4a35e a11561f2804e2c32c78049f8b9aeb517 49d9878c9d3071aa1d2f26cb947b784c 7ebe74afecf146ec4db816c8deced64f 41ebb36872854b2b33c8c028e23d8ad1 - c4266f2f24fdd8e039113c6b0955af9f eb72da68c159497d5f0c8eeddc51b5ae 8f300259dcb46224bdc1fe5273107324 2fa0b06ed72e36b4071cab9d0b4f87d0 bc98c2fe196a68a79036814396513a8d a9b3cb97dac3cda6e932a49bf9a507bd a0d97d985483413f3c18bfe5833ae9ce b3021e979faa7600756c06dfadfcf14c de32372598811d63bcc1a0eaf6872644 4461918e985715e4a2b07494e1f91326 +G14 d17b02d12afa7f832ee37df6f24a8f55 e35184c8ff18e9116fc8faef20532f56 670705cd2a59c4a23a897ac656a888fe ac1b21798c0f672ad26f5a91ea278590 d00defcca8588f21ce16fa1d0ac13389 a11561f2804e2c32c78049f8b9aeb517 dc94bf1ec4ff9bed2a1f460cbd958656 7ebe74afecf146ec4db816c8deced64f 41ebb36872854b2b33c8c028e23d8ad1 fe04d17ec353c08b903c85fc0ca4dc02 5b128d659955716833ce42f2bb060212 eb72da68c159497d5f0c8eeddc51b5ae - b9060019038526aa6fc38d2f7510edc6 bc98c2fe196a68a79036814396513a8d 16e55766c603fe33c9e75d8e81743ae2 a0d97d985483413f3c18bfe5833ae9ce b3021e979faa7600756c06dfadfcf14c a012eee23637b48e39b00808a057e35d 4461918e985715e4a2b07494e1f91326 +G4 d17b02d12afa7f832ee37df6f24a8f55 e35184c8ff18e9116fc8faef20532f56 670705cd2a59c4a23a897ac656a888fe ac1b21798c0f672ad26f5a91ea278590 d00defcca8588f21ce16fa1d0ac13389 a11561f2804e2c32c78049f8b9aeb517 dc94bf1ec4ff9bed2a1f460cbd958656 7ebe74afecf146ec4db816c8deced64f 41ebb36872854b2b33c8c028e23d8ad1 fe04d17ec353c08b903c85fc0ca4dc02 5b128d659955716833ce42f2bb060212 eb72da68c159497d5f0c8eeddc51b5ae 8f300259dcb46224bdc1fe5273107324 b9060019038526aa6fc38d2f7510edc6 bc98c2fe196a68a79036814396513a8d 16e55766c603fe33c9e75d8e81743ae2 a0d97d985483413f3c18bfe5833ae9ce b3021e979faa7600756c06dfadfcf14c a012eee23637b48e39b00808a057e35d 4461918e985715e4a2b07494e1f91326 +G3 d17b02d12afa7f832ee37df6f24a8f55 8b70e777f6bbf2c91ff75947824b5976 670705cd2a59c4a23a897ac656a888fe 73790840c76943caac0ebb3b2b3f0b98 8cf4341689dd00f74adfcc43d1f4a35e a11561f2804e2c32c78049f8b9aeb517 49d9878c9d3071aa1d2f26cb947b784c 7ebe74afecf146ec4db816c8deced64f 41ebb36872854b2b33c8c028e23d8ad1 fe04d17ec353c08b903c85fc0ca4dc02 c4266f2f24fdd8e039113c6b0955af9f eb72da68c159497d5f0c8eeddc51b5ae 8f300259dcb46224bdc1fe5273107324 2fa0b06ed72e36b4071cab9d0b4f87d0 bc98c2fe196a68a79036814396513a8d a9b3cb97dac3cda6e932a49bf9a507bd a0d97d985483413f3c18bfe5833ae9ce b3021e979faa7600756c06dfadfcf14c de32372598811d63bcc1a0eaf6872644 4461918e985715e4a2b07494e1f91326 +G11 d17b02d12afa7f832ee37df6f24a8f55 8b70e777f6bbf2c91ff75947824b5976 670705cd2a59c4a23a897ac656a888fe 73790840c76943caac0ebb3b2b3f0b98 8cf4341689dd00f74adfcc43d1f4a35e a11561f2804e2c32c78049f8b9aeb517 49d9878c9d3071aa1d2f26cb947b784c 7ebe74afecf146ec4db816c8deced64f 41ebb36872854b2b33c8c028e23d8ad1 fe04d17ec353c08b903c85fc0ca4dc02 c4266f2f24fdd8e039113c6b0955af9f - 8f300259dcb46224bdc1fe5273107324 2fa0b06ed72e36b4071cab9d0b4f87d0 bc98c2fe196a68a79036814396513a8d a9b3cb97dac3cda6e932a49bf9a507bd a0d97d985483413f3c18bfe5833ae9ce b3021e979faa7600756c06dfadfcf14c de32372598811d63bcc1a0eaf6872644 4461918e985715e4a2b07494e1f91326 diff --git a/tests/test_data/outputs/merge/conservative/profile_dists/allele_map.json b/tests/test_data/outputs/merge/conservative/profile_dists/allele_map.json new file mode 100755 index 0000000..476c71a --- /dev/null +++ b/tests/test_data/outputs/merge/conservative/profile_dists/allele_map.json @@ -0,0 +1,78 @@ +{ + "locus_1": { + "a47cc24760462371e919143c5cc81376": 1, + "d17b02d12afa7f832ee37df6f24a8f55": 2, + "e9e707ebc64e10a881f1323ebff85369": 3 + }, + "locus_2": { + "8b70e777f6bbf2c91ff75947824b5976": 1, + "e35184c8ff18e9116fc8faef20532f56": 2 + }, + "locus_3": { + "670705cd2a59c4a23a897ac656a888fe": 1 + }, + "locus_4": { + "73790840c76943caac0ebb3b2b3f0b98": 1, + "ac1b21798c0f672ad26f5a91ea278590": 2 + }, + "locus_5": { + "8cf4341689dd00f74adfcc43d1f4a35e": 1, + "d00defcca8588f21ce16fa1d0ac13389": 2 + }, + "locus_6": { + "a11561f2804e2c32c78049f8b9aeb517": 1 + }, + "locus_7": { + "49d9878c9d3071aa1d2f26cb947b784c": 1, + "dc94bf1ec4ff9bed2a1f460cbd958656": 2 + }, + "locus_8": { + "7ebe74afecf146ec4db816c8deced64f": 1 + }, + "locus_9": { + "41ebb36872854b2b33c8c028e23d8ad1": 1 + }, + "locus_10": { + "0": 0, + "796419469778f7ec3851c813f59cfff7": 1, + "fd6284b58a891cf02058906c9ee37a00": 2, + "fe04d17ec353c08b903c85fc0ca4dc02": 3 + }, + "locus_11": { + "0": 0, + "5b128d659955716833ce42f2bb060212": 1, + "c4266f2f24fdd8e039113c6b0955af9f": 2 + }, + "locus_12": { + "0": 0, + "eb72da68c159497d5f0c8eeddc51b5ae": 1 + }, + "locus_13": { + "0": 0, + "8f300259dcb46224bdc1fe5273107324": 1 + }, + "locus_14": { + "2fa0b06ed72e36b4071cab9d0b4f87d0": 1, + "b9060019038526aa6fc38d2f7510edc6": 2 + }, + "locus_15": { + "bc98c2fe196a68a79036814396513a8d": 1 + }, + "locus_16": { + "16e55766c603fe33c9e75d8e81743ae2": 1, + "a9b3cb97dac3cda6e932a49bf9a507bd": 2 + }, + "locus_17": { + "a0d97d985483413f3c18bfe5833ae9ce": 1 + }, + "locus_18": { + "b3021e979faa7600756c06dfadfcf14c": 1 + }, + "locus_19": { + "a012eee23637b48e39b00808a057e35d": 1, + "de32372598811d63bcc1a0eaf6872644": 2 + }, + "locus_20": { + "4461918e985715e4a2b07494e1f91326": 1 + } +} \ No newline at end of file diff --git a/tests/test_data/outputs/merge/conservative/profile_dists/query_profile.text b/tests/test_data/outputs/merge/conservative/profile_dists/query_profile.text new file mode 100755 index 0000000..19baea7 --- /dev/null +++ b/tests/test_data/outputs/merge/conservative/profile_dists/query_profile.text @@ -0,0 +1,15 @@ +sample_id locus_1 locus_2 locus_3 locus_4 locus_5 locus_6 locus_7 locus_8 locus_9 locus_10 locus_11 locus_12 locus_13 locus_14 locus_15 locus_16 locus_17 locus_18 locus_19 locus_20 +G10 2 2 1 2 2 1 2 1 1 3 0 1 1 2 1 1 1 1 1 1 +G12 2 2 1 2 2 1 2 1 1 3 1 0 1 2 1 1 1 1 1 1 +G13 2 1 1 1 1 1 1 1 1 3 2 1 0 1 1 2 1 1 2 1 +G5 3 1 1 1 1 1 1 1 1 3 2 1 1 1 1 2 1 1 2 1 +G7 2 1 1 1 1 1 1 1 1 1 2 1 1 1 1 2 1 1 2 1 +G8 2 2 1 2 2 1 2 1 1 2 1 1 1 2 1 1 1 1 1 1 +G1 2 1 1 1 1 1 1 1 1 3 2 1 1 1 1 2 1 1 2 1 +G2 2 2 1 2 2 1 2 1 1 3 1 1 1 2 1 1 1 1 1 1 +G6 1 2 1 2 2 1 2 1 1 3 1 1 1 2 1 1 1 1 1 1 +G9 2 1 1 1 1 1 1 1 1 0 2 1 1 1 1 2 1 1 2 1 +G14 2 2 1 2 2 1 2 1 1 3 1 1 0 2 1 1 1 1 1 1 +G4 2 2 1 2 2 1 2 1 1 3 1 1 1 2 1 1 1 1 1 1 +G3 2 1 1 1 1 1 1 1 1 3 2 1 1 1 1 2 1 1 2 1 +G11 2 1 1 1 1 1 1 1 1 3 2 0 1 1 1 2 1 1 2 1 diff --git a/tests/test_data/outputs/merge/conservative/profile_dists/ref_profile.text b/tests/test_data/outputs/merge/conservative/profile_dists/ref_profile.text new file mode 100755 index 0000000..19baea7 --- /dev/null +++ b/tests/test_data/outputs/merge/conservative/profile_dists/ref_profile.text @@ -0,0 +1,15 @@ +sample_id locus_1 locus_2 locus_3 locus_4 locus_5 locus_6 locus_7 locus_8 locus_9 locus_10 locus_11 locus_12 locus_13 locus_14 locus_15 locus_16 locus_17 locus_18 locus_19 locus_20 +G10 2 2 1 2 2 1 2 1 1 3 0 1 1 2 1 1 1 1 1 1 +G12 2 2 1 2 2 1 2 1 1 3 1 0 1 2 1 1 1 1 1 1 +G13 2 1 1 1 1 1 1 1 1 3 2 1 0 1 1 2 1 1 2 1 +G5 3 1 1 1 1 1 1 1 1 3 2 1 1 1 1 2 1 1 2 1 +G7 2 1 1 1 1 1 1 1 1 1 2 1 1 1 1 2 1 1 2 1 +G8 2 2 1 2 2 1 2 1 1 2 1 1 1 2 1 1 1 1 1 1 +G1 2 1 1 1 1 1 1 1 1 3 2 1 1 1 1 2 1 1 2 1 +G2 2 2 1 2 2 1 2 1 1 3 1 1 1 2 1 1 1 1 1 1 +G6 1 2 1 2 2 1 2 1 1 3 1 1 1 2 1 1 1 1 1 1 +G9 2 1 1 1 1 1 1 1 1 0 2 1 1 1 1 2 1 1 2 1 +G14 2 2 1 2 2 1 2 1 1 3 1 1 0 2 1 1 1 1 1 1 +G4 2 2 1 2 2 1 2 1 1 3 1 1 1 2 1 1 1 1 1 1 +G3 2 1 1 1 1 1 1 1 1 3 2 1 1 1 1 2 1 1 2 1 +G11 2 1 1 1 1 1 1 1 1 3 2 0 1 1 1 2 1 1 2 1 diff --git a/tests/test_data/outputs/merge/conservative/profile_dists/results.text b/tests/test_data/outputs/merge/conservative/profile_dists/results.text new file mode 100755 index 0000000..b27a212 --- /dev/null +++ b/tests/test_data/outputs/merge/conservative/profile_dists/results.text @@ -0,0 +1,15 @@ +dists G10 G12 G13 G5 G7 G8 G1 G2 G6 G9 G14 G4 G3 G11 +G10 0 0 7 8 8 1 7 0 1 7 0 0 7 7 +G12 0 0 8 9 9 1 8 0 1 8 0 0 8 8 +G13 7 8 0 1 1 9 0 8 9 0 8 8 0 0 +G5 8 9 1 0 2 10 1 9 9 1 9 9 1 1 +G7 8 9 1 2 0 9 1 9 10 0 9 9 1 1 +G8 1 1 9 10 9 0 9 1 2 8 1 1 9 9 +G1 7 8 0 1 1 9 0 8 9 0 8 8 0 0 +G2 0 0 8 9 9 1 8 0 1 8 0 0 8 8 +G6 1 1 9 9 10 2 9 1 0 9 1 1 9 9 +G9 7 8 0 1 0 8 0 8 9 0 8 8 0 0 +G14 0 0 8 9 9 1 8 0 1 8 0 0 8 8 +G4 0 0 8 9 9 1 8 0 1 8 0 0 8 8 +G3 7 8 0 1 1 9 0 8 9 0 8 8 0 0 +G11 7 8 0 1 1 9 0 8 9 0 8 8 0 0 diff --git a/tests/test_data/outputs/merge/conservative/profile_dists/run.json b/tests/test_data/outputs/merge/conservative/profile_dists/run.json new file mode 100755 index 0000000..d548f75 --- /dev/null +++ b/tests/test_data/outputs/merge/conservative/profile_dists/run.json @@ -0,0 +1,38 @@ +{ + "profile_dists": "version: 1.0.0", + "analysis_start_time": "10/06/2024 11:14:41", + "analysis_end_time": "10/06/2024 11:14:48", + "parameters": { + "query": "locidex/merge/conservative/profile.tsv", + "ref": "locidex/merge/conservative/profile.tsv", + "outdir": "locidex/merge/conservative/profile_dists", + "outfmt": "matrix", + "file_type": "text", + "distm": "hamming", + "missing_thresh": 1.0, + "sample_qual_thresh": 1.0, + "match_threshold": -1, + "mapping_file": null, + "batch_size": null, + "max_mem": null, + "force": false, + "skip": false, + "columns": null, + "count_missing": false, + "cpus": 1 + }, + "query_profile_info": { + "num_samples": 14, + "num_samples_pass": 14, + "failed_samples": [], + "parsed_file_path": "locidex/merge/conservative/profile_dists/query_profile.text" + }, + "ref_profile_info": { + "num_samples": 14, + "num_samples_pass": 14, + "failed_samples": [], + "parsed_file_path": "locidex/merge/conservative/profile_dists/ref_profile.text" + }, + "loci_removed": [], + "result_file": "locidex/merge/conservative/profile_dists/results.text" +} \ No newline at end of file diff --git a/tests/test_data/outputs/merge/normal/profile.tsv b/tests/test_data/outputs/merge/normal/profile.tsv new file mode 100755 index 0000000..245ca6f --- /dev/null +++ b/tests/test_data/outputs/merge/normal/profile.tsv @@ -0,0 +1,15 @@ +sample_id locus_1 locus_2 locus_3 locus_4 locus_5 locus_6 locus_7 locus_8 locus_9 locus_10 locus_11 locus_12 locus_13 locus_14 locus_15 locus_16 locus_17 locus_18 locus_19 locus_20 +G10 d17b02d12afa7f832ee37df6f24a8f55 e35184c8ff18e9116fc8faef20532f56 670705cd2a59c4a23a897ac656a888fe ac1b21798c0f672ad26f5a91ea278590 d00defcca8588f21ce16fa1d0ac13389 a11561f2804e2c32c78049f8b9aeb517 dc94bf1ec4ff9bed2a1f460cbd958656 7ebe74afecf146ec4db816c8deced64f 41ebb36872854b2b33c8c028e23d8ad1 fe04d17ec353c08b903c85fc0ca4dc02 - eb72da68c159497d5f0c8eeddc51b5ae 8f300259dcb46224bdc1fe5273107324 b9060019038526aa6fc38d2f7510edc6 bc98c2fe196a68a79036814396513a8d 16e55766c603fe33c9e75d8e81743ae2 a0d97d985483413f3c18bfe5833ae9ce b3021e979faa7600756c06dfadfcf14c a012eee23637b48e39b00808a057e35d 4461918e985715e4a2b07494e1f91326 +G12 d17b02d12afa7f832ee37df6f24a8f55 e35184c8ff18e9116fc8faef20532f56 670705cd2a59c4a23a897ac656a888fe ac1b21798c0f672ad26f5a91ea278590 d00defcca8588f21ce16fa1d0ac13389 a11561f2804e2c32c78049f8b9aeb517 dc94bf1ec4ff9bed2a1f460cbd958656 7ebe74afecf146ec4db816c8deced64f 41ebb36872854b2b33c8c028e23d8ad1 fe04d17ec353c08b903c85fc0ca4dc02 5b128d659955716833ce42f2bb060212 60934464690fea26102d1c8c9acb755d 8f300259dcb46224bdc1fe5273107324 b9060019038526aa6fc38d2f7510edc6 bc98c2fe196a68a79036814396513a8d 16e55766c603fe33c9e75d8e81743ae2 a0d97d985483413f3c18bfe5833ae9ce b3021e979faa7600756c06dfadfcf14c a012eee23637b48e39b00808a057e35d 4461918e985715e4a2b07494e1f91326 +G13 d17b02d12afa7f832ee37df6f24a8f55 8b70e777f6bbf2c91ff75947824b5976 670705cd2a59c4a23a897ac656a888fe 73790840c76943caac0ebb3b2b3f0b98 8cf4341689dd00f74adfcc43d1f4a35e a11561f2804e2c32c78049f8b9aeb517 49d9878c9d3071aa1d2f26cb947b784c 7ebe74afecf146ec4db816c8deced64f 41ebb36872854b2b33c8c028e23d8ad1 fe04d17ec353c08b903c85fc0ca4dc02 c4266f2f24fdd8e039113c6b0955af9f eb72da68c159497d5f0c8eeddc51b5ae 70e88b95c11c37150f37312882af5771 2fa0b06ed72e36b4071cab9d0b4f87d0 bc98c2fe196a68a79036814396513a8d a9b3cb97dac3cda6e932a49bf9a507bd a0d97d985483413f3c18bfe5833ae9ce b3021e979faa7600756c06dfadfcf14c de32372598811d63bcc1a0eaf6872644 4461918e985715e4a2b07494e1f91326 +G5 e9e707ebc64e10a881f1323ebff85369 8b70e777f6bbf2c91ff75947824b5976 670705cd2a59c4a23a897ac656a888fe 73790840c76943caac0ebb3b2b3f0b98 8cf4341689dd00f74adfcc43d1f4a35e a11561f2804e2c32c78049f8b9aeb517 49d9878c9d3071aa1d2f26cb947b784c 7ebe74afecf146ec4db816c8deced64f 41ebb36872854b2b33c8c028e23d8ad1 fe04d17ec353c08b903c85fc0ca4dc02 c4266f2f24fdd8e039113c6b0955af9f eb72da68c159497d5f0c8eeddc51b5ae 8f300259dcb46224bdc1fe5273107324 2fa0b06ed72e36b4071cab9d0b4f87d0 bc98c2fe196a68a79036814396513a8d a9b3cb97dac3cda6e932a49bf9a507bd a0d97d985483413f3c18bfe5833ae9ce b3021e979faa7600756c06dfadfcf14c de32372598811d63bcc1a0eaf6872644 4461918e985715e4a2b07494e1f91326 +G7 d17b02d12afa7f832ee37df6f24a8f55 8b70e777f6bbf2c91ff75947824b5976 670705cd2a59c4a23a897ac656a888fe 73790840c76943caac0ebb3b2b3f0b98 8cf4341689dd00f74adfcc43d1f4a35e a11561f2804e2c32c78049f8b9aeb517 49d9878c9d3071aa1d2f26cb947b784c 7ebe74afecf146ec4db816c8deced64f 41ebb36872854b2b33c8c028e23d8ad1 796419469778f7ec3851c813f59cfff7 c4266f2f24fdd8e039113c6b0955af9f eb72da68c159497d5f0c8eeddc51b5ae 8f300259dcb46224bdc1fe5273107324 2fa0b06ed72e36b4071cab9d0b4f87d0 bc98c2fe196a68a79036814396513a8d a9b3cb97dac3cda6e932a49bf9a507bd a0d97d985483413f3c18bfe5833ae9ce b3021e979faa7600756c06dfadfcf14c de32372598811d63bcc1a0eaf6872644 4461918e985715e4a2b07494e1f91326 +G8 d17b02d12afa7f832ee37df6f24a8f55 e35184c8ff18e9116fc8faef20532f56 670705cd2a59c4a23a897ac656a888fe ac1b21798c0f672ad26f5a91ea278590 d00defcca8588f21ce16fa1d0ac13389 a11561f2804e2c32c78049f8b9aeb517 dc94bf1ec4ff9bed2a1f460cbd958656 7ebe74afecf146ec4db816c8deced64f 41ebb36872854b2b33c8c028e23d8ad1 fd6284b58a891cf02058906c9ee37a00 5b128d659955716833ce42f2bb060212 eb72da68c159497d5f0c8eeddc51b5ae 8f300259dcb46224bdc1fe5273107324 b9060019038526aa6fc38d2f7510edc6 bc98c2fe196a68a79036814396513a8d 16e55766c603fe33c9e75d8e81743ae2 a0d97d985483413f3c18bfe5833ae9ce b3021e979faa7600756c06dfadfcf14c a012eee23637b48e39b00808a057e35d 4461918e985715e4a2b07494e1f91326 +G1 d17b02d12afa7f832ee37df6f24a8f55 8b70e777f6bbf2c91ff75947824b5976 670705cd2a59c4a23a897ac656a888fe 73790840c76943caac0ebb3b2b3f0b98 8cf4341689dd00f74adfcc43d1f4a35e a11561f2804e2c32c78049f8b9aeb517 49d9878c9d3071aa1d2f26cb947b784c 7ebe74afecf146ec4db816c8deced64f 41ebb36872854b2b33c8c028e23d8ad1 fe04d17ec353c08b903c85fc0ca4dc02 c4266f2f24fdd8e039113c6b0955af9f eb72da68c159497d5f0c8eeddc51b5ae 8f300259dcb46224bdc1fe5273107324 2fa0b06ed72e36b4071cab9d0b4f87d0 bc98c2fe196a68a79036814396513a8d a9b3cb97dac3cda6e932a49bf9a507bd a0d97d985483413f3c18bfe5833ae9ce b3021e979faa7600756c06dfadfcf14c de32372598811d63bcc1a0eaf6872644 4461918e985715e4a2b07494e1f91326 +G2 d17b02d12afa7f832ee37df6f24a8f55 e35184c8ff18e9116fc8faef20532f56 670705cd2a59c4a23a897ac656a888fe ac1b21798c0f672ad26f5a91ea278590 d00defcca8588f21ce16fa1d0ac13389 a11561f2804e2c32c78049f8b9aeb517 dc94bf1ec4ff9bed2a1f460cbd958656 7ebe74afecf146ec4db816c8deced64f 41ebb36872854b2b33c8c028e23d8ad1 fe04d17ec353c08b903c85fc0ca4dc02 5b128d659955716833ce42f2bb060212 eb72da68c159497d5f0c8eeddc51b5ae 8f300259dcb46224bdc1fe5273107324 b9060019038526aa6fc38d2f7510edc6 bc98c2fe196a68a79036814396513a8d 16e55766c603fe33c9e75d8e81743ae2 a0d97d985483413f3c18bfe5833ae9ce b3021e979faa7600756c06dfadfcf14c a012eee23637b48e39b00808a057e35d 4461918e985715e4a2b07494e1f91326 +G6 a47cc24760462371e919143c5cc81376 e35184c8ff18e9116fc8faef20532f56 670705cd2a59c4a23a897ac656a888fe ac1b21798c0f672ad26f5a91ea278590 d00defcca8588f21ce16fa1d0ac13389 a11561f2804e2c32c78049f8b9aeb517 dc94bf1ec4ff9bed2a1f460cbd958656 7ebe74afecf146ec4db816c8deced64f 41ebb36872854b2b33c8c028e23d8ad1 fe04d17ec353c08b903c85fc0ca4dc02 5b128d659955716833ce42f2bb060212 eb72da68c159497d5f0c8eeddc51b5ae 8f300259dcb46224bdc1fe5273107324 b9060019038526aa6fc38d2f7510edc6 bc98c2fe196a68a79036814396513a8d 16e55766c603fe33c9e75d8e81743ae2 a0d97d985483413f3c18bfe5833ae9ce b3021e979faa7600756c06dfadfcf14c a012eee23637b48e39b00808a057e35d 4461918e985715e4a2b07494e1f91326 +G9 d17b02d12afa7f832ee37df6f24a8f55 8b70e777f6bbf2c91ff75947824b5976 670705cd2a59c4a23a897ac656a888fe 73790840c76943caac0ebb3b2b3f0b98 8cf4341689dd00f74adfcc43d1f4a35e a11561f2804e2c32c78049f8b9aeb517 49d9878c9d3071aa1d2f26cb947b784c 7ebe74afecf146ec4db816c8deced64f 41ebb36872854b2b33c8c028e23d8ad1 - c4266f2f24fdd8e039113c6b0955af9f eb72da68c159497d5f0c8eeddc51b5ae 8f300259dcb46224bdc1fe5273107324 2fa0b06ed72e36b4071cab9d0b4f87d0 bc98c2fe196a68a79036814396513a8d a9b3cb97dac3cda6e932a49bf9a507bd a0d97d985483413f3c18bfe5833ae9ce b3021e979faa7600756c06dfadfcf14c de32372598811d63bcc1a0eaf6872644 4461918e985715e4a2b07494e1f91326 +G14 d17b02d12afa7f832ee37df6f24a8f55 e35184c8ff18e9116fc8faef20532f56 670705cd2a59c4a23a897ac656a888fe ac1b21798c0f672ad26f5a91ea278590 d00defcca8588f21ce16fa1d0ac13389 a11561f2804e2c32c78049f8b9aeb517 dc94bf1ec4ff9bed2a1f460cbd958656 7ebe74afecf146ec4db816c8deced64f 41ebb36872854b2b33c8c028e23d8ad1 fe04d17ec353c08b903c85fc0ca4dc02 5b128d659955716833ce42f2bb060212 eb72da68c159497d5f0c8eeddc51b5ae 70e88b95c11c37150f37312882af5771 b9060019038526aa6fc38d2f7510edc6 bc98c2fe196a68a79036814396513a8d 16e55766c603fe33c9e75d8e81743ae2 a0d97d985483413f3c18bfe5833ae9ce b3021e979faa7600756c06dfadfcf14c a012eee23637b48e39b00808a057e35d 4461918e985715e4a2b07494e1f91326 +G4 d17b02d12afa7f832ee37df6f24a8f55 e35184c8ff18e9116fc8faef20532f56 670705cd2a59c4a23a897ac656a888fe ac1b21798c0f672ad26f5a91ea278590 d00defcca8588f21ce16fa1d0ac13389 a11561f2804e2c32c78049f8b9aeb517 dc94bf1ec4ff9bed2a1f460cbd958656 7ebe74afecf146ec4db816c8deced64f 41ebb36872854b2b33c8c028e23d8ad1 fe04d17ec353c08b903c85fc0ca4dc02 5b128d659955716833ce42f2bb060212 eb72da68c159497d5f0c8eeddc51b5ae 8f300259dcb46224bdc1fe5273107324 b9060019038526aa6fc38d2f7510edc6 bc98c2fe196a68a79036814396513a8d 16e55766c603fe33c9e75d8e81743ae2 a0d97d985483413f3c18bfe5833ae9ce b3021e979faa7600756c06dfadfcf14c a012eee23637b48e39b00808a057e35d 4461918e985715e4a2b07494e1f91326 +G3 d17b02d12afa7f832ee37df6f24a8f55 8b70e777f6bbf2c91ff75947824b5976 670705cd2a59c4a23a897ac656a888fe 73790840c76943caac0ebb3b2b3f0b98 8cf4341689dd00f74adfcc43d1f4a35e a11561f2804e2c32c78049f8b9aeb517 49d9878c9d3071aa1d2f26cb947b784c 7ebe74afecf146ec4db816c8deced64f 41ebb36872854b2b33c8c028e23d8ad1 fe04d17ec353c08b903c85fc0ca4dc02 c4266f2f24fdd8e039113c6b0955af9f eb72da68c159497d5f0c8eeddc51b5ae 8f300259dcb46224bdc1fe5273107324 2fa0b06ed72e36b4071cab9d0b4f87d0 bc98c2fe196a68a79036814396513a8d a9b3cb97dac3cda6e932a49bf9a507bd a0d97d985483413f3c18bfe5833ae9ce b3021e979faa7600756c06dfadfcf14c de32372598811d63bcc1a0eaf6872644 4461918e985715e4a2b07494e1f91326 +G11 d17b02d12afa7f832ee37df6f24a8f55 8b70e777f6bbf2c91ff75947824b5976 670705cd2a59c4a23a897ac656a888fe 73790840c76943caac0ebb3b2b3f0b98 8cf4341689dd00f74adfcc43d1f4a35e a11561f2804e2c32c78049f8b9aeb517 49d9878c9d3071aa1d2f26cb947b784c 7ebe74afecf146ec4db816c8deced64f 41ebb36872854b2b33c8c028e23d8ad1 fe04d17ec353c08b903c85fc0ca4dc02 c4266f2f24fdd8e039113c6b0955af9f 60934464690fea26102d1c8c9acb755d 8f300259dcb46224bdc1fe5273107324 2fa0b06ed72e36b4071cab9d0b4f87d0 bc98c2fe196a68a79036814396513a8d a9b3cb97dac3cda6e932a49bf9a507bd a0d97d985483413f3c18bfe5833ae9ce b3021e979faa7600756c06dfadfcf14c de32372598811d63bcc1a0eaf6872644 4461918e985715e4a2b07494e1f91326 diff --git a/tests/test_data/outputs/merge/normal/profile_dists/allele_map.json b/tests/test_data/outputs/merge/normal/profile_dists/allele_map.json new file mode 100755 index 0000000..2d1feaa --- /dev/null +++ b/tests/test_data/outputs/merge/normal/profile_dists/allele_map.json @@ -0,0 +1,78 @@ +{ + "locus_1": { + "a47cc24760462371e919143c5cc81376": 1, + "d17b02d12afa7f832ee37df6f24a8f55": 2, + "e9e707ebc64e10a881f1323ebff85369": 3 + }, + "locus_2": { + "8b70e777f6bbf2c91ff75947824b5976": 1, + "e35184c8ff18e9116fc8faef20532f56": 2 + }, + "locus_3": { + "670705cd2a59c4a23a897ac656a888fe": 1 + }, + "locus_4": { + "73790840c76943caac0ebb3b2b3f0b98": 1, + "ac1b21798c0f672ad26f5a91ea278590": 2 + }, + "locus_5": { + "8cf4341689dd00f74adfcc43d1f4a35e": 1, + "d00defcca8588f21ce16fa1d0ac13389": 2 + }, + "locus_6": { + "a11561f2804e2c32c78049f8b9aeb517": 1 + }, + "locus_7": { + "49d9878c9d3071aa1d2f26cb947b784c": 1, + "dc94bf1ec4ff9bed2a1f460cbd958656": 2 + }, + "locus_8": { + "7ebe74afecf146ec4db816c8deced64f": 1 + }, + "locus_9": { + "41ebb36872854b2b33c8c028e23d8ad1": 1 + }, + "locus_10": { + "0": 0, + "796419469778f7ec3851c813f59cfff7": 1, + "fd6284b58a891cf02058906c9ee37a00": 2, + "fe04d17ec353c08b903c85fc0ca4dc02": 3 + }, + "locus_11": { + "0": 0, + "5b128d659955716833ce42f2bb060212": 1, + "c4266f2f24fdd8e039113c6b0955af9f": 2 + }, + "locus_12": { + "60934464690fea26102d1c8c9acb755d": 1, + "eb72da68c159497d5f0c8eeddc51b5ae": 2 + }, + "locus_13": { + "70e88b95c11c37150f37312882af5771": 1, + "8f300259dcb46224bdc1fe5273107324": 2 + }, + "locus_14": { + "2fa0b06ed72e36b4071cab9d0b4f87d0": 1, + "b9060019038526aa6fc38d2f7510edc6": 2 + }, + "locus_15": { + "bc98c2fe196a68a79036814396513a8d": 1 + }, + "locus_16": { + "16e55766c603fe33c9e75d8e81743ae2": 1, + "a9b3cb97dac3cda6e932a49bf9a507bd": 2 + }, + "locus_17": { + "a0d97d985483413f3c18bfe5833ae9ce": 1 + }, + "locus_18": { + "b3021e979faa7600756c06dfadfcf14c": 1 + }, + "locus_19": { + "a012eee23637b48e39b00808a057e35d": 1, + "de32372598811d63bcc1a0eaf6872644": 2 + }, + "locus_20": { + "4461918e985715e4a2b07494e1f91326": 1 + } +} \ No newline at end of file diff --git a/tests/test_data/outputs/merge/normal/profile_dists/query_profile.text b/tests/test_data/outputs/merge/normal/profile_dists/query_profile.text new file mode 100755 index 0000000..27c9f25 --- /dev/null +++ b/tests/test_data/outputs/merge/normal/profile_dists/query_profile.text @@ -0,0 +1,15 @@ +sample_id locus_1 locus_2 locus_3 locus_4 locus_5 locus_6 locus_7 locus_8 locus_9 locus_10 locus_11 locus_12 locus_13 locus_14 locus_15 locus_16 locus_17 locus_18 locus_19 locus_20 +G10 2 2 1 2 2 1 2 1 1 3 0 2 2 2 1 1 1 1 1 1 +G12 2 2 1 2 2 1 2 1 1 3 1 1 2 2 1 1 1 1 1 1 +G13 2 1 1 1 1 1 1 1 1 3 2 2 1 1 1 2 1 1 2 1 +G5 3 1 1 1 1 1 1 1 1 3 2 2 2 1 1 2 1 1 2 1 +G7 2 1 1 1 1 1 1 1 1 1 2 2 2 1 1 2 1 1 2 1 +G8 2 2 1 2 2 1 2 1 1 2 1 2 2 2 1 1 1 1 1 1 +G1 2 1 1 1 1 1 1 1 1 3 2 2 2 1 1 2 1 1 2 1 +G2 2 2 1 2 2 1 2 1 1 3 1 2 2 2 1 1 1 1 1 1 +G6 1 2 1 2 2 1 2 1 1 3 1 2 2 2 1 1 1 1 1 1 +G9 2 1 1 1 1 1 1 1 1 0 2 2 2 1 1 2 1 1 2 1 +G14 2 2 1 2 2 1 2 1 1 3 1 2 1 2 1 1 1 1 1 1 +G4 2 2 1 2 2 1 2 1 1 3 1 2 2 2 1 1 1 1 1 1 +G3 2 1 1 1 1 1 1 1 1 3 2 2 2 1 1 2 1 1 2 1 +G11 2 1 1 1 1 1 1 1 1 3 2 1 2 1 1 2 1 1 2 1 diff --git a/tests/test_data/outputs/merge/normal/profile_dists/ref_profile.text b/tests/test_data/outputs/merge/normal/profile_dists/ref_profile.text new file mode 100755 index 0000000..27c9f25 --- /dev/null +++ b/tests/test_data/outputs/merge/normal/profile_dists/ref_profile.text @@ -0,0 +1,15 @@ +sample_id locus_1 locus_2 locus_3 locus_4 locus_5 locus_6 locus_7 locus_8 locus_9 locus_10 locus_11 locus_12 locus_13 locus_14 locus_15 locus_16 locus_17 locus_18 locus_19 locus_20 +G10 2 2 1 2 2 1 2 1 1 3 0 2 2 2 1 1 1 1 1 1 +G12 2 2 1 2 2 1 2 1 1 3 1 1 2 2 1 1 1 1 1 1 +G13 2 1 1 1 1 1 1 1 1 3 2 2 1 1 1 2 1 1 2 1 +G5 3 1 1 1 1 1 1 1 1 3 2 2 2 1 1 2 1 1 2 1 +G7 2 1 1 1 1 1 1 1 1 1 2 2 2 1 1 2 1 1 2 1 +G8 2 2 1 2 2 1 2 1 1 2 1 2 2 2 1 1 1 1 1 1 +G1 2 1 1 1 1 1 1 1 1 3 2 2 2 1 1 2 1 1 2 1 +G2 2 2 1 2 2 1 2 1 1 3 1 2 2 2 1 1 1 1 1 1 +G6 1 2 1 2 2 1 2 1 1 3 1 2 2 2 1 1 1 1 1 1 +G9 2 1 1 1 1 1 1 1 1 0 2 2 2 1 1 2 1 1 2 1 +G14 2 2 1 2 2 1 2 1 1 3 1 2 1 2 1 1 1 1 1 1 +G4 2 2 1 2 2 1 2 1 1 3 1 2 2 2 1 1 1 1 1 1 +G3 2 1 1 1 1 1 1 1 1 3 2 2 2 1 1 2 1 1 2 1 +G11 2 1 1 1 1 1 1 1 1 3 2 1 2 1 1 2 1 1 2 1 diff --git a/tests/test_data/outputs/merge/normal/profile_dists/results.text b/tests/test_data/outputs/merge/normal/profile_dists/results.text new file mode 100755 index 0000000..474eec8 --- /dev/null +++ b/tests/test_data/outputs/merge/normal/profile_dists/results.text @@ -0,0 +1,15 @@ +dists G10 G12 G13 G5 G7 G8 G1 G2 G6 G9 G14 G4 G3 G11 +G10 0 1 8 8 8 1 7 0 1 7 1 0 7 8 +G12 1 0 10 10 10 2 9 1 2 9 2 1 9 8 +G13 8 10 0 2 2 10 1 9 10 1 8 9 1 2 +G5 8 10 2 0 2 10 1 9 9 1 10 9 1 2 +G7 8 10 2 2 0 9 1 9 10 0 10 9 1 2 +G8 1 2 10 10 9 0 9 1 2 8 2 1 9 10 +G1 7 9 1 1 1 9 0 8 9 0 9 8 0 1 +G2 0 1 9 9 9 1 8 0 1 8 1 0 8 9 +G6 1 2 10 9 10 2 9 1 0 9 2 1 9 10 +G9 7 9 1 1 0 8 0 8 9 0 9 8 0 1 +G14 1 2 8 10 10 2 9 1 2 9 0 1 9 10 +G4 0 1 9 9 9 1 8 0 1 8 1 0 8 9 +G3 7 9 1 1 1 9 0 8 9 0 9 8 0 1 +G11 8 8 2 2 2 10 1 9 10 1 10 9 1 0 diff --git a/tests/test_data/outputs/merge/normal/profile_dists/run.json b/tests/test_data/outputs/merge/normal/profile_dists/run.json new file mode 100755 index 0000000..f64714f --- /dev/null +++ b/tests/test_data/outputs/merge/normal/profile_dists/run.json @@ -0,0 +1,38 @@ +{ + "profile_dists": "version: 1.0.0", + "analysis_start_time": "10/06/2024 11:14:29", + "analysis_end_time": "10/06/2024 11:14:37", + "parameters": { + "query": "locidex/merge/normal/profile.tsv", + "ref": "locidex/merge/normal/profile.tsv", + "outdir": "locidex/merge/normal/profile_dists", + "outfmt": "matrix", + "file_type": "text", + "distm": "hamming", + "missing_thresh": 1.0, + "sample_qual_thresh": 1.0, + "match_threshold": -1, + "mapping_file": null, + "batch_size": null, + "max_mem": null, + "force": false, + "skip": false, + "columns": null, + "count_missing": false, + "cpus": 1 + }, + "query_profile_info": { + "num_samples": 14, + "num_samples_pass": 14, + "failed_samples": [], + "parsed_file_path": "locidex/merge/normal/profile_dists/query_profile.text" + }, + "ref_profile_info": { + "num_samples": 14, + "num_samples_pass": 14, + "failed_samples": [], + "parsed_file_path": "locidex/merge/normal/profile_dists/ref_profile.text" + }, + "loci_removed": [], + "result_file": "locidex/merge/normal/profile_dists/results.text" +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/conservative/G1/nucleotide.hits.txt b/tests/test_data/outputs/report/conservative/G1/nucleotide.hits.txt new file mode 100755 index 0000000..9871163 --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G1/nucleotide.hits.txt @@ -0,0 +1,21 @@ +query_name qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore seq_id locus_name locus_name_alt locus_product locus_description locus_uid dna_seq_len dna_seq_hash aa_seq_len aa_seq_hash dna_min_len dna_max_len aa_min_len aa_max_len dna_min_ident aa_min_ident min_dna_match_cov min_aa_match_cov count_int_stops dna_ambig_count +locus_1:0:0:0 0 0 102 102 1 102 1 102 102 0 100.0 100 100 plus 1.8e-51 189 0 locus_1 SALM_11273 ! hypothetical protein 1 102 d17b02d12afa7f832ee37df6f24a8f55 34 84ac553cb45dd790c497c27152888f02 71.4 132.6 23.8 44.2 80 64 80 80 0 0 +locus_10:9:0:1 1 9 762 762 1 762 1 762 762 0 100.0 100 100 plus 0.0 1408 9 locus_10 SALM_1891 200.96 5'/3'-nucleotidase SurE J 762 fe04d17ec353c08b903c85fc0ca4dc02 254 bd09702e070040e0fc8d2ec3b830812c 533.4 990.6 177.8 330.2 80 64 80 80 0 0 +locus_11:10:0:2 2 10 858 858 1 858 1 858 858 19 97.786 100 100 plus 0.0 1480 10 locus_11 SALM_1452 | 1-phosphofructokinase K 858 5b128d659955716833ce42f2bb060212 286 4daecf1a6ccae76e1d97a4ce9ee4ff0b 600.6 1115.4 200.2 371.8 80 64 80 80 0 0 +locus_12:11:0:3 3 11 972 972 1 972 1 972 972 0 100.0 100 100 plus 0.0 1796 11 locus_12 SALM_11020 _ 1 L 972 eb72da68c159497d5f0c8eeddc51b5ae 324 a1f16dd269dc295e715f2d00f9c26b43 680.4 1263.6 226.8 421.2 80 64 80 80 0 0 +locus_13:12:0:4 4 12 1098 1098 1 1098 1 1098 1098 0 100.0 100 100 plus 0.0 2028 12 locus_13 SALM_1934 - 3.9 M 1098 8f300259dcb46224bdc1fe5273107324 366 b16bc26e42f5bc4327504b1ec8b2d53d 768.6 1427.4 256.2 475.8 80 64 80 80 0 0 +locus_14:13:0:5 5 13 1281 1281 1 1281 1 1281 1281 11 99.141 100 100 plus 0.0 2305 13 locus_14 SALM_2871 + @ N 1281 b9060019038526aa6fc38d2f7510edc6 427 ed11561b2bedaa12c7a28eb0e9346101 896.7 1665.3 298.9 555.1 80 64 80 80 0 0 +locus_15:14:0:6 6 14 1434 1434 1 1434 1 1434 1434 0 100.0 100 100 plus 0.0 2649 14 locus_15 SALM_583 = DMT family transporter O 1434 bc98c2fe196a68a79036814396513a8d 478 16cb2acec887d5861327e04f2705b8ce 1003.8 1864.2 334.6 621.4 80 64 80 80 0 0 +locus_16:15:0:7 7 15 1464 1464 1 1464 1 1464 1464 15 98.975 100 100 plus 0.0 2621 15 locus_16 SALM_780 < murein transglycosylase A P 1464 16e55766c603fe33c9e75d8e81743ae2 488 b20314e55f9713235e9c4ea5817b56df 1024.8 1903.2 341.6 634.4 80 64 80 80 0 0 +locus_17:16:0:8 8 16 1836 1836 1 1836 1 1836 1836 0 100.0 100 100 plus 0.0 3391 16 locus_17 SALM_1937 > GTPase HflX Q 1836 a0d97d985483413f3c18bfe5833ae9ce 612 d6012eddc7a6e8d7d40761d00ed71a5a 1285.2 2386.8 428.4 795.6 80 64 80 80 0 0 +locus_18:17:0:9 9 17 1914 1914 1 1914 1 1914 1914 0 100.0 100 100 plus 0.0 3535 17 locus_18 SALM_1997 ? biosynthetic arginine decarboxylase R 1914 b3021e979faa7600756c06dfadfcf14c 638 e68e83956ee1d4c685571e5348c8def1 1339.8 2488.2 446.6 829.4 80 64 80 80 0 0 +locus_19:18:0:10 10 18 2037 2037 1 2037 1 2037 2037 16 99.215 100 100 plus 0.0 3674 18 locus_19 SALM_9926 , https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662 S 2037 a012eee23637b48e39b00808a057e35d 679 1302fea1dbd5bb756db34e09b863ad44 1425.9 2648.1 475.3 882.7 80 64 80 80 0 0 +locus_2:1:0:11 11 1 285 285 1 285 1 285 285 17 94.035 100 100 plus 2.3100000000000004e-124 433 1 locus_2 SALM_120 @ outer membrane protein B 285 e35184c8ff18e9116fc8faef20532f56 95 2d9e7f7f0d11bf02db860b0799e7924d 199.5 370.5 66.5 123.5 80 64 80 80 0 0 +locus_20:19:0:12 12 19 4935 4935 1 4935 1 4935 4935 0 100.0 100 100 plus 0.0 9114 19 locus_20 SALM_6064 . alpha-2-macroglobulin family protein T 4935 4461918e985715e4a2b07494e1f91326 1645 12ff39a1933fa478729b142976b0a659 3454.5 6415.5 1151.5 2138.5 80 64 80 80 0 0 +locus_3:2:0:13 13 2 327 327 1 327 1 327 327 0 100.0 100 100 plus 5.34e-176 604 2 locus_3 SALM_2016 # tRNA (guanosine(46)-N7)-methyltransferase TrmB C 327 670705cd2a59c4a23a897ac656a888fe 109 4277f8bbf1fd6682001da089fea83d04 228.9 425.1 76.3 141.7 80 64 80 80 0 0 +locus_4:3:0:14 14 3 417 417 1 417 1 417 417 11 97.362 100 100 plus 0.0 710 3 locus_4 SALM_8644 $ AZ624_004720 AZ624_004720 417 ac1b21798c0f672ad26f5a91ea278590 139 0c25367401155278f34832f184ab44e6 291.9 542.1 97.3 180.7 80 64 80 80 0 0 +locus_5:4:0:15 15 4 444 444 1 444 1 444 444 15 96.622 100 100 plus 0.0 737 4 locus_5 SALM_1876 % SPI-1 type III secretion system invasion lipoprotein InvH E 444 d00defcca8588f21ce16fa1d0ac13389 148 c8bf12a8057fc5e541fcd4924136a40d 310.8 577.2 103.6 192.4 80 64 80 80 0 0 +locus_6:5:0:16 16 5 543 543 1 543 1 543 543 0 100.0 100 100 plus 0.0 1003 5 locus_6 SALM_640 ^ MOSC domain-containing protein F 543 a11561f2804e2c32c78049f8b9aeb517 181 1f6a45ea291940ef2c17ec1cfdd5fbdd 380.1 705.9 126.7 235.3 80 64 80 80 0 0 +locus_7:6:0:17 17 6 606 606 1 606 1 606 606 15 97.525 100 100 plus 0.0 1037 6 locus_7 SALM_1501 & India: Vellore G 606 dc94bf1ec4ff9bed2a1f460cbd958656 202 62252b3326997117f127efb88ff09294 424.2 787.8 141.4 262.6 80 64 80 80 0 0 +locus_8:7:0:18 18 7 642 642 1 642 1 642 642 0 100.0 100 100 plus 0.0 1186 7 locus_8 SALM_756 * DNA polymerase III subunit delta' H 642 7ebe74afecf146ec4db816c8deced64f 214 2449629747a7c58f0f2cad411db87178 449.4 834.6 149.8 278.2 80 64 80 80 0 0 +locus_9:8:0:19 19 8 684 684 1 684 1 684 684 0 100.0 100 100 plus 0.0 1264 8 locus_9 SALM_7353 1 fimbrial assembly chaperone I 684 41ebb36872854b2b33c8c028e23d8ad1 228 a75a93991228940fb46d917f238beb5a 478.8 889.2 159.6 296.4 80 64 80 80 0 0 diff --git a/tests/test_data/outputs/report/conservative/G1/profile.json b/tests/test_data/outputs/report/conservative/G1/profile.json new file mode 100755 index 0000000..3f5fdd7 --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G1/profile.json @@ -0,0 +1,24 @@ +{ + "G1": { + "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", + "locus_2": "8b70e777f6bbf2c91ff75947824b5976", + "locus_3": "670705cd2a59c4a23a897ac656a888fe", + "locus_4": "73790840c76943caac0ebb3b2b3f0b98", + "locus_5": "8cf4341689dd00f74adfcc43d1f4a35e", + "locus_6": "a11561f2804e2c32c78049f8b9aeb517", + "locus_7": "49d9878c9d3071aa1d2f26cb947b784c", + "locus_8": "7ebe74afecf146ec4db816c8deced64f", + "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", + "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", + "locus_11": "c4266f2f24fdd8e039113c6b0955af9f", + "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", + "locus_13": "8f300259dcb46224bdc1fe5273107324", + "locus_14": "2fa0b06ed72e36b4071cab9d0b4f87d0", + "locus_15": "bc98c2fe196a68a79036814396513a8d", + "locus_16": "a9b3cb97dac3cda6e932a49bf9a507bd", + "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", + "locus_18": "b3021e979faa7600756c06dfadfcf14c", + "locus_19": "de32372598811d63bcc1a0eaf6872644", + "locus_20": "4461918e985715e4a2b07494e1f91326" + } +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/conservative/G1/protein.hits.txt b/tests/test_data/outputs/report/conservative/G1/protein.hits.txt new file mode 100755 index 0000000..c9b1c8a --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G1/protein.hits.txt @@ -0,0 +1,21 @@ +query_name qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore seq_id locus_name locus_name_alt locus_product locus_description locus_uid dna_seq_len dna_seq_hash aa_seq_len aa_seq_hash dna_min_len dna_max_len aa_min_len aa_max_len dna_min_ident aa_min_ident min_dna_match_cov min_aa_match_cov count_int_stops dna_ambig_count +locus_1:0:0:0 0 0 34 34 1 34 1 34 34 0 100.0 100 100 4.94e-20 64.3 0 locus_1 SALM_11273 ! hypothetical protein 1 102 d17b02d12afa7f832ee37df6f24a8f55 34 84ac553cb45dd790c497c27152888f02 71.4 132.6 23.8 44.2 80 64 80 80 0 0 +locus_10:9:0:1 1 9 254 254 1 254 1 254 254 0 100.0 100 100 0.0 514.0 9 locus_10 SALM_1891 200.96 5'/3'-nucleotidase SurE J 762 fe04d17ec353c08b903c85fc0ca4dc02 254 bd09702e070040e0fc8d2ec3b830812c 533.4 990.6 177.8 330.2 80 64 80 80 0 0 +locus_11:10:0:2 2 10 286 286 1 286 1 286 286 17 94.056 100 100 0.0 518.0 10 locus_11 SALM_1452 | 1-phosphofructokinase K 858 5b128d659955716833ce42f2bb060212 286 4daecf1a6ccae76e1d97a4ce9ee4ff0b 600.6 1115.4 200.2 371.8 80 64 80 80 0 0 +locus_12:11:0:3 3 11 324 324 1 324 1 324 324 0 100.0 100 100 0.0 640.0 11 locus_12 SALM_11020 _ 1 L 972 eb72da68c159497d5f0c8eeddc51b5ae 324 a1f16dd269dc295e715f2d00f9c26b43 680.4 1263.6 226.8 421.2 80 64 80 80 0 0 +locus_13:12:0:4 4 12 366 366 1 366 1 366 366 0 100.0 100 100 0.0 754.0 12 locus_13 SALM_1934 - 3.9 M 1098 8f300259dcb46224bdc1fe5273107324 366 b16bc26e42f5bc4327504b1ec8b2d53d 768.6 1427.4 256.2 475.8 80 64 80 80 0 0 +locus_14:13:0:5 5 13 427 427 1 427 1 427 427 9 97.892 100 100 0.0 846.0 13 locus_14 SALM_2871 + @ N 1281 b9060019038526aa6fc38d2f7510edc6 427 ed11561b2bedaa12c7a28eb0e9346101 896.7 1665.3 298.9 555.1 80 64 80 80 0 0 +locus_15:14:0:6 6 14 478 478 1 478 1 478 478 0 100.0 100 100 0.0 972.0 14 locus_15 SALM_583 = DMT family transporter O 1434 bc98c2fe196a68a79036814396513a8d 478 16cb2acec887d5861327e04f2705b8ce 1003.8 1864.2 334.6 621.4 80 64 80 80 0 0 +locus_16:15:0:7 7 15 488 488 1 488 1 488 488 14 97.131 100 100 0.0 972.0 15 locus_16 SALM_780 < murein transglycosylase A P 1464 16e55766c603fe33c9e75d8e81743ae2 488 b20314e55f9713235e9c4ea5817b56df 1024.8 1903.2 341.6 634.4 80 64 80 80 0 0 +locus_17:16:0:8 8 16 612 612 1 612 1 612 612 0 100.0 100 100 0.0 1241.0 16 locus_17 SALM_1937 > GTPase HflX Q 1836 a0d97d985483413f3c18bfe5833ae9ce 612 d6012eddc7a6e8d7d40761d00ed71a5a 1285.2 2386.8 428.4 795.6 80 64 80 80 0 0 +locus_18:17:0:9 9 17 638 638 1 638 1 638 638 0 100.0 100 100 0.0 1326.0 17 locus_18 SALM_1997 ? biosynthetic arginine decarboxylase R 1914 b3021e979faa7600756c06dfadfcf14c 638 e68e83956ee1d4c685571e5348c8def1 1339.8 2488.2 446.6 829.4 80 64 80 80 0 0 +locus_19:18:0:10 10 18 679 679 1 679 1 679 679 8 98.822 100 100 0.0 1403.0 18 locus_19 SALM_9926 , https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662 S 2037 a012eee23637b48e39b00808a057e35d 679 1302fea1dbd5bb756db34e09b863ad44 1425.9 2648.1 475.3 882.7 80 64 80 80 0 0 +locus_2:1:0:11 11 1 95 95 1 95 1 95 95 11 88.421 100 100 2.89e-51 147.0 1 locus_2 SALM_120 @ outer membrane protein B 285 e35184c8ff18e9116fc8faef20532f56 95 2d9e7f7f0d11bf02db860b0799e7924d 199.5 370.5 66.5 123.5 80 64 80 80 0 0 +locus_20:19:0:12 12 19 1645 1645 1 1645 1 1645 1645 0 100.0 100 100 0.0 3332.0 19 locus_20 SALM_6064 . alpha-2-macroglobulin family protein T 4935 4461918e985715e4a2b07494e1f91326 1645 12ff39a1933fa478729b142976b0a659 3454.5 6415.5 1151.5 2138.5 80 64 80 80 0 0 +locus_3:2:0:13 13 2 109 109 1 109 1 109 109 0 100.0 100 100 3.09e-81 224.0 2 locus_3 SALM_2016 # tRNA (guanosine(46)-N7)-methyltransferase TrmB C 327 670705cd2a59c4a23a897ac656a888fe 109 4277f8bbf1fd6682001da089fea83d04 228.9 425.1 76.3 141.7 80 64 80 80 0 0 +locus_4:3:0:14 14 3 139 139 1 139 1 139 139 9 93.525 100 100 3.6999999999999996e-98 269.0 3 locus_4 SALM_8644 $ AZ624_004720 AZ624_004720 417 ac1b21798c0f672ad26f5a91ea278590 139 0c25367401155278f34832f184ab44e6 291.9 542.1 97.3 180.7 80 64 80 80 0 0 +locus_5:4:0:15 15 4 148 148 1 148 1 148 148 11 92.568 100 100 1.5600000000000001e-97 268.0 4 locus_5 SALM_1876 % SPI-1 type III secretion system invasion lipoprotein InvH E 444 d00defcca8588f21ce16fa1d0ac13389 148 c8bf12a8057fc5e541fcd4924136a40d 310.8 577.2 103.6 192.4 80 64 80 80 0 0 +locus_6:5:0:16 16 5 181 181 1 181 1 181 181 0 100.0 100 100 1.7899999999999999e-140 380.0 5 locus_6 SALM_640 ^ MOSC domain-containing protein F 543 a11561f2804e2c32c78049f8b9aeb517 181 1f6a45ea291940ef2c17ec1cfdd5fbdd 380.1 705.9 126.7 235.3 80 64 80 80 0 0 +locus_7:6:0:17 17 6 202 202 1 202 1 202 202 14 93.069 100 100 2.36e-141 384.0 6 locus_7 SALM_1501 & India: Vellore G 606 dc94bf1ec4ff9bed2a1f460cbd958656 202 62252b3326997117f127efb88ff09294 424.2 787.8 141.4 262.6 80 64 80 80 0 0 +locus_8:7:0:18 18 7 214 214 1 214 1 214 214 0 100.0 100 100 1.75e-157 426.0 7 locus_8 SALM_756 * DNA polymerase III subunit delta' H 642 7ebe74afecf146ec4db816c8deced64f 214 2449629747a7c58f0f2cad411db87178 449.4 834.6 149.8 278.2 80 64 80 80 0 0 +locus_9:8:0:19 19 8 228 228 1 228 1 228 228 0 100.0 100 100 6.32e-172 463.0 8 locus_9 SALM_7353 1 fimbrial assembly chaperone I 684 41ebb36872854b2b33c8c028e23d8ad1 228 a75a93991228940fb46d917f238beb5a 478.8 889.2 159.6 296.4 80 64 80 80 0 0 diff --git a/tests/test_data/outputs/report/conservative/G10/nucleotide.hits.txt b/tests/test_data/outputs/report/conservative/G10/nucleotide.hits.txt new file mode 100755 index 0000000..fbba040 --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G10/nucleotide.hits.txt @@ -0,0 +1,20 @@ +query_name qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore seq_id locus_name locus_name_alt locus_product locus_description locus_uid dna_seq_len dna_seq_hash aa_seq_len aa_seq_hash dna_min_len dna_max_len aa_min_len aa_max_len dna_min_ident aa_min_ident min_dna_match_cov min_aa_match_cov count_int_stops dna_ambig_count +locus_1:0:0:0 0 0 102 102 1 102 1 102 102 0 100.0 100 100 plus 1.8e-51 189 0 locus_1 SALM_11273 ! hypothetical protein 1 102 d17b02d12afa7f832ee37df6f24a8f55 34 84ac553cb45dd790c497c27152888f02 71.4 132.6 23.8 44.2 80 64 80 80 0 0 +locus_10:9:0:1 1 9 762 762 1 762 1 762 762 0 100.0 100 100 plus 0.0 1408 9 locus_10 SALM_1891 200.96 5'/3'-nucleotidase SurE J 762 fe04d17ec353c08b903c85fc0ca4dc02 254 bd09702e070040e0fc8d2ec3b830812c 533.4 990.6 177.8 330.2 80 64 80 80 0 0 +locus_12:11:0:2 2 11 972 972 1 972 1 972 972 0 100.0 100 100 plus 0.0 1796 11 locus_12 SALM_11020 _ 1 L 972 eb72da68c159497d5f0c8eeddc51b5ae 324 a1f16dd269dc295e715f2d00f9c26b43 680.4 1263.6 226.8 421.2 80 64 80 80 0 0 +locus_13:12:0:3 3 12 1098 1098 1 1098 1 1098 1098 0 100.0 100 100 plus 0.0 2028 12 locus_13 SALM_1934 - 3.9 M 1098 8f300259dcb46224bdc1fe5273107324 366 b16bc26e42f5bc4327504b1ec8b2d53d 768.6 1427.4 256.2 475.8 80 64 80 80 0 0 +locus_14:13:0:4 4 13 1281 1281 1 1281 1 1281 1281 0 100.0 100 100 plus 0.0 2366 13 locus_14 SALM_2871 + @ N 1281 b9060019038526aa6fc38d2f7510edc6 427 ed11561b2bedaa12c7a28eb0e9346101 896.7 1665.3 298.9 555.1 80 64 80 80 0 0 +locus_15:14:0:5 5 14 1434 1434 1 1434 1 1434 1434 0 100.0 100 100 plus 0.0 2649 14 locus_15 SALM_583 = DMT family transporter O 1434 bc98c2fe196a68a79036814396513a8d 478 16cb2acec887d5861327e04f2705b8ce 1003.8 1864.2 334.6 621.4 80 64 80 80 0 0 +locus_16:15:0:6 6 15 1464 1464 1 1464 1 1464 1464 0 100.0 100 100 plus 0.0 2704 15 locus_16 SALM_780 < murein transglycosylase A P 1464 16e55766c603fe33c9e75d8e81743ae2 488 b20314e55f9713235e9c4ea5817b56df 1024.8 1903.2 341.6 634.4 80 64 80 80 0 0 +locus_17:16:0:7 7 16 1836 1836 1 1836 1 1836 1836 0 100.0 100 100 plus 0.0 3391 16 locus_17 SALM_1937 > GTPase HflX Q 1836 a0d97d985483413f3c18bfe5833ae9ce 612 d6012eddc7a6e8d7d40761d00ed71a5a 1285.2 2386.8 428.4 795.6 80 64 80 80 0 0 +locus_18:17:0:8 8 17 1914 1914 1 1914 1 1914 1914 0 100.0 100 100 plus 0.0 3535 17 locus_18 SALM_1997 ? biosynthetic arginine decarboxylase R 1914 b3021e979faa7600756c06dfadfcf14c 638 e68e83956ee1d4c685571e5348c8def1 1339.8 2488.2 446.6 829.4 80 64 80 80 0 0 +locus_19:18:0:9 9 18 2037 2037 1 2037 1 2037 2037 0 100.0 100 100 plus 0.0 3762 18 locus_19 SALM_9926 , https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662 S 2037 a012eee23637b48e39b00808a057e35d 679 1302fea1dbd5bb756db34e09b863ad44 1425.9 2648.1 475.3 882.7 80 64 80 80 0 0 +locus_2:1:0:10 10 1 285 285 1 285 1 285 285 0 100.0 100 100 plus 1.03e-152 527 1 locus_2 SALM_120 @ outer membrane protein B 285 e35184c8ff18e9116fc8faef20532f56 95 2d9e7f7f0d11bf02db860b0799e7924d 199.5 370.5 66.5 123.5 80 64 80 80 0 0 +locus_20:19:0:11 11 19 4935 4935 1 4935 1 4935 4935 0 100.0 100 100 plus 0.0 9114 19 locus_20 SALM_6064 . alpha-2-macroglobulin family protein T 4935 4461918e985715e4a2b07494e1f91326 1645 12ff39a1933fa478729b142976b0a659 3454.5 6415.5 1151.5 2138.5 80 64 80 80 0 0 +locus_3:2:0:12 12 2 327 327 1 327 1 327 327 0 100.0 100 100 plus 5.34e-176 604 2 locus_3 SALM_2016 # tRNA (guanosine(46)-N7)-methyltransferase TrmB C 327 670705cd2a59c4a23a897ac656a888fe 109 4277f8bbf1fd6682001da089fea83d04 228.9 425.1 76.3 141.7 80 64 80 80 0 0 +locus_4:3:0:13 13 3 417 417 1 417 1 417 417 0 100.0 100 100 plus 0.0 771 3 locus_4 SALM_8644 $ AZ624_004720 AZ624_004720 417 ac1b21798c0f672ad26f5a91ea278590 139 0c25367401155278f34832f184ab44e6 291.9 542.1 97.3 180.7 80 64 80 80 0 0 +locus_5:4:0:14 14 4 444 444 1 444 1 444 444 0 100.0 100 100 plus 0.0 821 4 locus_5 SALM_1876 % SPI-1 type III secretion system invasion lipoprotein InvH E 444 d00defcca8588f21ce16fa1d0ac13389 148 c8bf12a8057fc5e541fcd4924136a40d 310.8 577.2 103.6 192.4 80 64 80 80 0 0 +locus_6:5:0:15 15 5 543 543 1 543 1 543 543 0 100.0 100 100 plus 0.0 1003 5 locus_6 SALM_640 ^ MOSC domain-containing protein F 543 a11561f2804e2c32c78049f8b9aeb517 181 1f6a45ea291940ef2c17ec1cfdd5fbdd 380.1 705.9 126.7 235.3 80 64 80 80 0 0 +locus_7:6:0:16 16 6 606 606 1 606 1 606 606 0 100.0 100 100 plus 0.0 1120 6 locus_7 SALM_1501 & India: Vellore G 606 dc94bf1ec4ff9bed2a1f460cbd958656 202 62252b3326997117f127efb88ff09294 424.2 787.8 141.4 262.6 80 64 80 80 0 0 +locus_8:7:0:17 17 7 642 642 1 642 1 642 642 0 100.0 100 100 plus 0.0 1186 7 locus_8 SALM_756 * DNA polymerase III subunit delta' H 642 7ebe74afecf146ec4db816c8deced64f 214 2449629747a7c58f0f2cad411db87178 449.4 834.6 149.8 278.2 80 64 80 80 0 0 +locus_9:8:0:18 18 8 684 684 1 684 1 684 684 0 100.0 100 100 plus 0.0 1264 8 locus_9 SALM_7353 1 fimbrial assembly chaperone I 684 41ebb36872854b2b33c8c028e23d8ad1 228 a75a93991228940fb46d917f238beb5a 478.8 889.2 159.6 296.4 80 64 80 80 0 0 diff --git a/tests/test_data/outputs/report/conservative/G10/profile.json b/tests/test_data/outputs/report/conservative/G10/profile.json new file mode 100755 index 0000000..3c394e9 --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G10/profile.json @@ -0,0 +1,24 @@ +{ + "G10": { + "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", + "locus_2": "e35184c8ff18e9116fc8faef20532f56", + "locus_3": "670705cd2a59c4a23a897ac656a888fe", + "locus_4": "ac1b21798c0f672ad26f5a91ea278590", + "locus_5": "d00defcca8588f21ce16fa1d0ac13389", + "locus_6": "a11561f2804e2c32c78049f8b9aeb517", + "locus_7": "dc94bf1ec4ff9bed2a1f460cbd958656", + "locus_8": "7ebe74afecf146ec4db816c8deced64f", + "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", + "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", + "locus_11": "-", + "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", + "locus_13": "8f300259dcb46224bdc1fe5273107324", + "locus_14": "b9060019038526aa6fc38d2f7510edc6", + "locus_15": "bc98c2fe196a68a79036814396513a8d", + "locus_16": "16e55766c603fe33c9e75d8e81743ae2", + "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", + "locus_18": "b3021e979faa7600756c06dfadfcf14c", + "locus_19": "a012eee23637b48e39b00808a057e35d", + "locus_20": "4461918e985715e4a2b07494e1f91326" + } +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/conservative/G10/protein.hits.txt b/tests/test_data/outputs/report/conservative/G10/protein.hits.txt new file mode 100755 index 0000000..36030d1 --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G10/protein.hits.txt @@ -0,0 +1,20 @@ +query_name qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore seq_id locus_name locus_name_alt locus_product locus_description locus_uid dna_seq_len dna_seq_hash aa_seq_len aa_seq_hash dna_min_len dna_max_len aa_min_len aa_max_len dna_min_ident aa_min_ident min_dna_match_cov min_aa_match_cov count_int_stops dna_ambig_count +locus_1:0:0:0 0 0 34 34 1 34 1 34 34 0 100.0 100 100 4.94e-20 64.3 0 locus_1 SALM_11273 ! hypothetical protein 1 102 d17b02d12afa7f832ee37df6f24a8f55 34 84ac553cb45dd790c497c27152888f02 71.4 132.6 23.8 44.2 80 64 80 80 0 0 +locus_10:9:0:1 1 9 254 254 1 254 1 254 254 0 100.0 100 100 0.0 514.0 9 locus_10 SALM_1891 200.96 5'/3'-nucleotidase SurE J 762 fe04d17ec353c08b903c85fc0ca4dc02 254 bd09702e070040e0fc8d2ec3b830812c 533.4 990.6 177.8 330.2 80 64 80 80 0 0 +locus_12:11:0:2 2 11 324 324 1 324 1 324 324 0 100.0 100 100 0.0 640.0 11 locus_12 SALM_11020 _ 1 L 972 eb72da68c159497d5f0c8eeddc51b5ae 324 a1f16dd269dc295e715f2d00f9c26b43 680.4 1263.6 226.8 421.2 80 64 80 80 0 0 +locus_13:12:0:3 3 12 366 366 1 366 1 366 366 0 100.0 100 100 0.0 754.0 12 locus_13 SALM_1934 - 3.9 M 1098 8f300259dcb46224bdc1fe5273107324 366 b16bc26e42f5bc4327504b1ec8b2d53d 768.6 1427.4 256.2 475.8 80 64 80 80 0 0 +locus_14:13:0:4 4 13 427 427 1 427 1 427 427 0 100.0 100 100 0.0 869.0 13 locus_14 SALM_2871 + @ N 1281 b9060019038526aa6fc38d2f7510edc6 427 ed11561b2bedaa12c7a28eb0e9346101 896.7 1665.3 298.9 555.1 80 64 80 80 0 0 +locus_15:14:0:5 5 14 478 478 1 478 1 478 478 0 100.0 100 100 0.0 972.0 14 locus_15 SALM_583 = DMT family transporter O 1434 bc98c2fe196a68a79036814396513a8d 478 16cb2acec887d5861327e04f2705b8ce 1003.8 1864.2 334.6 621.4 80 64 80 80 0 0 +locus_16:15:0:6 6 15 488 488 1 488 1 488 488 0 100.0 100 100 0.0 1004.0 15 locus_16 SALM_780 < murein transglycosylase A P 1464 16e55766c603fe33c9e75d8e81743ae2 488 b20314e55f9713235e9c4ea5817b56df 1024.8 1903.2 341.6 634.4 80 64 80 80 0 0 +locus_17:16:0:7 7 16 612 612 1 612 1 612 612 0 100.0 100 100 0.0 1241.0 16 locus_17 SALM_1937 > GTPase HflX Q 1836 a0d97d985483413f3c18bfe5833ae9ce 612 d6012eddc7a6e8d7d40761d00ed71a5a 1285.2 2386.8 428.4 795.6 80 64 80 80 0 0 +locus_18:17:0:8 8 17 638 638 1 638 1 638 638 0 100.0 100 100 0.0 1326.0 17 locus_18 SALM_1997 ? biosynthetic arginine decarboxylase R 1914 b3021e979faa7600756c06dfadfcf14c 638 e68e83956ee1d4c685571e5348c8def1 1339.8 2488.2 446.6 829.4 80 64 80 80 0 0 +locus_19:18:0:9 9 18 679 679 1 679 1 679 679 0 100.0 100 100 0.0 1419.0 18 locus_19 SALM_9926 , https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662 S 2037 a012eee23637b48e39b00808a057e35d 679 1302fea1dbd5bb756db34e09b863ad44 1425.9 2648.1 475.3 882.7 80 64 80 80 0 0 +locus_2:1:0:10 10 1 95 95 1 95 1 95 95 0 100.0 100 100 1.2299999999999998e-68 191.0 1 locus_2 SALM_120 @ outer membrane protein B 285 e35184c8ff18e9116fc8faef20532f56 95 2d9e7f7f0d11bf02db860b0799e7924d 199.5 370.5 66.5 123.5 80 64 80 80 0 0 +locus_20:19:0:11 11 19 1645 1645 1 1645 1 1645 1645 0 100.0 100 100 0.0 3332.0 19 locus_20 SALM_6064 . alpha-2-macroglobulin family protein T 4935 4461918e985715e4a2b07494e1f91326 1645 12ff39a1933fa478729b142976b0a659 3454.5 6415.5 1151.5 2138.5 80 64 80 80 0 0 +locus_3:2:0:12 12 2 109 109 1 109 1 109 109 0 100.0 100 100 3.09e-81 224.0 2 locus_3 SALM_2016 # tRNA (guanosine(46)-N7)-methyltransferase TrmB C 327 670705cd2a59c4a23a897ac656a888fe 109 4277f8bbf1fd6682001da089fea83d04 228.9 425.1 76.3 141.7 80 64 80 80 0 0 +locus_4:3:0:13 13 3 139 139 1 139 1 139 139 0 100.0 100 100 2.34e-106 290.0 3 locus_4 SALM_8644 $ AZ624_004720 AZ624_004720 417 ac1b21798c0f672ad26f5a91ea278590 139 0c25367401155278f34832f184ab44e6 291.9 542.1 97.3 180.7 80 64 80 80 0 0 +locus_5:4:0:14 14 4 148 148 1 148 1 148 148 0 100.0 100 100 1.0400000000000001e-110 302.0 4 locus_5 SALM_1876 % SPI-1 type III secretion system invasion lipoprotein InvH E 444 d00defcca8588f21ce16fa1d0ac13389 148 c8bf12a8057fc5e541fcd4924136a40d 310.8 577.2 103.6 192.4 80 64 80 80 0 0 +locus_6:5:0:15 15 5 181 181 1 181 1 181 181 0 100.0 100 100 1.7899999999999999e-140 380.0 5 locus_6 SALM_640 ^ MOSC domain-containing protein F 543 a11561f2804e2c32c78049f8b9aeb517 181 1f6a45ea291940ef2c17ec1cfdd5fbdd 380.1 705.9 126.7 235.3 80 64 80 80 0 0 +locus_7:6:0:16 16 6 202 202 1 202 1 202 202 0 100.0 100 100 3.71e-154 416.0 6 locus_7 SALM_1501 & India: Vellore G 606 dc94bf1ec4ff9bed2a1f460cbd958656 202 62252b3326997117f127efb88ff09294 424.2 787.8 141.4 262.6 80 64 80 80 0 0 +locus_8:7:0:17 17 7 214 214 1 214 1 214 214 0 100.0 100 100 1.75e-157 426.0 7 locus_8 SALM_756 * DNA polymerase III subunit delta' H 642 7ebe74afecf146ec4db816c8deced64f 214 2449629747a7c58f0f2cad411db87178 449.4 834.6 149.8 278.2 80 64 80 80 0 0 +locus_9:8:0:18 18 8 228 228 1 228 1 228 228 0 100.0 100 100 6.32e-172 463.0 8 locus_9 SALM_7353 1 fimbrial assembly chaperone I 684 41ebb36872854b2b33c8c028e23d8ad1 228 a75a93991228940fb46d917f238beb5a 478.8 889.2 159.6 296.4 80 64 80 80 0 0 diff --git a/tests/test_data/outputs/report/conservative/G11/nucleotide.hits.txt b/tests/test_data/outputs/report/conservative/G11/nucleotide.hits.txt new file mode 100755 index 0000000..b2f81da --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G11/nucleotide.hits.txt @@ -0,0 +1,22 @@ +query_name qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore seq_id locus_name locus_name_alt locus_product locus_description locus_uid dna_seq_len dna_seq_hash aa_seq_len aa_seq_hash dna_min_len dna_max_len aa_min_len aa_max_len dna_min_ident aa_min_ident min_dna_match_cov min_aa_match_cov count_int_stops dna_ambig_count +locus_1:0:0:0 0 0 102 102 1 102 1 102 102 0 100.0 100 100 plus 1.8e-51 189 0 locus_1 SALM_11273 ! hypothetical protein 1 102 d17b02d12afa7f832ee37df6f24a8f55 34 84ac553cb45dd790c497c27152888f02 71.4 132.6 23.8 44.2 80 64 80 80 0 0 +locus_10:9:0:1 1 9 762 762 1 762 1 762 762 0 100.0 100 100 plus 0.0 1408 9 locus_10 SALM_1891 200.96 5'/3'-nucleotidase SurE J 762 fe04d17ec353c08b903c85fc0ca4dc02 254 bd09702e070040e0fc8d2ec3b830812c 533.4 990.6 177.8 330.2 80 64 80 80 0 0 +locus_11:10:0:2 2 10 858 858 1 858 1 858 858 19 97.786 100 100 plus 0.0 1480 10 locus_11 SALM_1452 | 1-phosphofructokinase K 858 5b128d659955716833ce42f2bb060212 286 4daecf1a6ccae76e1d97a4ce9ee4ff0b 600.6 1115.4 200.2 371.8 80 64 80 80 0 0 +locus_12:11:0:3 3 11 972 972 1 972 1 972 972 0 100.0 100 100 plus 0.0 1796 11 locus_12 SALM_11020 _ 1 L 972 eb72da68c159497d5f0c8eeddc51b5ae 324 a1f16dd269dc295e715f2d00f9c26b43 680.4 1263.6 226.8 421.2 80 64 80 80 0 0 +locus_12:11:0:4 4 11 972 972 1 972 1 972 972 0 100.0 100 100 plus 0.0 1796 11 locus_12 SALM_11020 _ 1 L 972 eb72da68c159497d5f0c8eeddc51b5ae 324 a1f16dd269dc295e715f2d00f9c26b43 680.4 1263.6 226.8 421.2 80 64 80 80 0 0 +locus_13:12:0:5 5 12 1098 1098 1 1098 1 1098 1098 0 100.0 100 100 plus 0.0 2028 12 locus_13 SALM_1934 - 3.9 M 1098 8f300259dcb46224bdc1fe5273107324 366 b16bc26e42f5bc4327504b1ec8b2d53d 768.6 1427.4 256.2 475.8 80 64 80 80 0 0 +locus_14:13:0:6 6 13 1281 1281 1 1281 1 1281 1281 11 99.141 100 100 plus 0.0 2305 13 locus_14 SALM_2871 + @ N 1281 b9060019038526aa6fc38d2f7510edc6 427 ed11561b2bedaa12c7a28eb0e9346101 896.7 1665.3 298.9 555.1 80 64 80 80 0 0 +locus_15:14:0:7 7 14 1434 1434 1 1434 1 1434 1434 0 100.0 100 100 plus 0.0 2649 14 locus_15 SALM_583 = DMT family transporter O 1434 bc98c2fe196a68a79036814396513a8d 478 16cb2acec887d5861327e04f2705b8ce 1003.8 1864.2 334.6 621.4 80 64 80 80 0 0 +locus_16:15:0:8 8 15 1464 1464 1 1464 1 1464 1464 15 98.975 100 100 plus 0.0 2621 15 locus_16 SALM_780 < murein transglycosylase A P 1464 16e55766c603fe33c9e75d8e81743ae2 488 b20314e55f9713235e9c4ea5817b56df 1024.8 1903.2 341.6 634.4 80 64 80 80 0 0 +locus_17:16:0:9 9 16 1836 1836 1 1836 1 1836 1836 0 100.0 100 100 plus 0.0 3391 16 locus_17 SALM_1937 > GTPase HflX Q 1836 a0d97d985483413f3c18bfe5833ae9ce 612 d6012eddc7a6e8d7d40761d00ed71a5a 1285.2 2386.8 428.4 795.6 80 64 80 80 0 0 +locus_18:17:0:10 10 17 1914 1914 1 1914 1 1914 1914 0 100.0 100 100 plus 0.0 3535 17 locus_18 SALM_1997 ? biosynthetic arginine decarboxylase R 1914 b3021e979faa7600756c06dfadfcf14c 638 e68e83956ee1d4c685571e5348c8def1 1339.8 2488.2 446.6 829.4 80 64 80 80 0 0 +locus_19:18:0:11 11 18 2037 2037 1 2037 1 2037 2037 16 99.215 100 100 plus 0.0 3674 18 locus_19 SALM_9926 , https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662 S 2037 a012eee23637b48e39b00808a057e35d 679 1302fea1dbd5bb756db34e09b863ad44 1425.9 2648.1 475.3 882.7 80 64 80 80 0 0 +locus_2:1:0:12 12 1 285 285 1 285 1 285 285 17 94.035 100 100 plus 2.3100000000000004e-124 433 1 locus_2 SALM_120 @ outer membrane protein B 285 e35184c8ff18e9116fc8faef20532f56 95 2d9e7f7f0d11bf02db860b0799e7924d 199.5 370.5 66.5 123.5 80 64 80 80 0 0 +locus_20:19:0:13 13 19 4935 4935 1 4935 1 4935 4935 0 100.0 100 100 plus 0.0 9114 19 locus_20 SALM_6064 . alpha-2-macroglobulin family protein T 4935 4461918e985715e4a2b07494e1f91326 1645 12ff39a1933fa478729b142976b0a659 3454.5 6415.5 1151.5 2138.5 80 64 80 80 0 0 +locus_3:2:0:14 14 2 327 327 1 327 1 327 327 0 100.0 100 100 plus 5.34e-176 604 2 locus_3 SALM_2016 # tRNA (guanosine(46)-N7)-methyltransferase TrmB C 327 670705cd2a59c4a23a897ac656a888fe 109 4277f8bbf1fd6682001da089fea83d04 228.9 425.1 76.3 141.7 80 64 80 80 0 0 +locus_4:3:0:15 15 3 417 417 1 417 1 417 417 11 97.362 100 100 plus 0.0 710 3 locus_4 SALM_8644 $ AZ624_004720 AZ624_004720 417 ac1b21798c0f672ad26f5a91ea278590 139 0c25367401155278f34832f184ab44e6 291.9 542.1 97.3 180.7 80 64 80 80 0 0 +locus_5:4:0:16 16 4 444 444 1 444 1 444 444 15 96.622 100 100 plus 0.0 737 4 locus_5 SALM_1876 % SPI-1 type III secretion system invasion lipoprotein InvH E 444 d00defcca8588f21ce16fa1d0ac13389 148 c8bf12a8057fc5e541fcd4924136a40d 310.8 577.2 103.6 192.4 80 64 80 80 0 0 +locus_6:5:0:17 17 5 543 543 1 543 1 543 543 0 100.0 100 100 plus 0.0 1003 5 locus_6 SALM_640 ^ MOSC domain-containing protein F 543 a11561f2804e2c32c78049f8b9aeb517 181 1f6a45ea291940ef2c17ec1cfdd5fbdd 380.1 705.9 126.7 235.3 80 64 80 80 0 0 +locus_7:6:0:18 18 6 606 606 1 606 1 606 606 15 97.525 100 100 plus 0.0 1037 6 locus_7 SALM_1501 & India: Vellore G 606 dc94bf1ec4ff9bed2a1f460cbd958656 202 62252b3326997117f127efb88ff09294 424.2 787.8 141.4 262.6 80 64 80 80 0 0 +locus_8:7:0:19 19 7 642 642 1 642 1 642 642 0 100.0 100 100 plus 0.0 1186 7 locus_8 SALM_756 * DNA polymerase III subunit delta' H 642 7ebe74afecf146ec4db816c8deced64f 214 2449629747a7c58f0f2cad411db87178 449.4 834.6 149.8 278.2 80 64 80 80 0 0 +locus_9:8:0:20 20 8 684 684 1 684 1 684 684 0 100.0 100 100 plus 0.0 1264 8 locus_9 SALM_7353 1 fimbrial assembly chaperone I 684 41ebb36872854b2b33c8c028e23d8ad1 228 a75a93991228940fb46d917f238beb5a 478.8 889.2 159.6 296.4 80 64 80 80 0 0 diff --git a/tests/test_data/outputs/report/conservative/G11/profile.json b/tests/test_data/outputs/report/conservative/G11/profile.json new file mode 100755 index 0000000..d6cf3fd --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G11/profile.json @@ -0,0 +1,24 @@ +{ + "G11": { + "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", + "locus_2": "8b70e777f6bbf2c91ff75947824b5976", + "locus_3": "670705cd2a59c4a23a897ac656a888fe", + "locus_4": "73790840c76943caac0ebb3b2b3f0b98", + "locus_5": "8cf4341689dd00f74adfcc43d1f4a35e", + "locus_6": "a11561f2804e2c32c78049f8b9aeb517", + "locus_7": "49d9878c9d3071aa1d2f26cb947b784c", + "locus_8": "7ebe74afecf146ec4db816c8deced64f", + "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", + "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", + "locus_11": "c4266f2f24fdd8e039113c6b0955af9f", + "locus_12": "-", + "locus_13": "8f300259dcb46224bdc1fe5273107324", + "locus_14": "2fa0b06ed72e36b4071cab9d0b4f87d0", + "locus_15": "bc98c2fe196a68a79036814396513a8d", + "locus_16": "a9b3cb97dac3cda6e932a49bf9a507bd", + "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", + "locus_18": "b3021e979faa7600756c06dfadfcf14c", + "locus_19": "de32372598811d63bcc1a0eaf6872644", + "locus_20": "4461918e985715e4a2b07494e1f91326" + } +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/conservative/G11/protein.hits.txt b/tests/test_data/outputs/report/conservative/G11/protein.hits.txt new file mode 100755 index 0000000..33fcef0 --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G11/protein.hits.txt @@ -0,0 +1,22 @@ +query_name qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore seq_id locus_name locus_name_alt locus_product locus_description locus_uid dna_seq_len dna_seq_hash aa_seq_len aa_seq_hash dna_min_len dna_max_len aa_min_len aa_max_len dna_min_ident aa_min_ident min_dna_match_cov min_aa_match_cov count_int_stops dna_ambig_count +locus_1:0:0:0 0 0 34 34 1 34 1 34 34 0 100.0 100 100 4.94e-20 64.3 0 locus_1 SALM_11273 ! hypothetical protein 1 102 d17b02d12afa7f832ee37df6f24a8f55 34 84ac553cb45dd790c497c27152888f02 71.4 132.6 23.8 44.2 80 64 80 80 0 0 +locus_10:9:0:1 1 9 254 254 1 254 1 254 254 0 100.0 100 100 0.0 514.0 9 locus_10 SALM_1891 200.96 5'/3'-nucleotidase SurE J 762 fe04d17ec353c08b903c85fc0ca4dc02 254 bd09702e070040e0fc8d2ec3b830812c 533.4 990.6 177.8 330.2 80 64 80 80 0 0 +locus_11:10:0:2 2 10 286 286 1 286 1 286 286 17 94.056 100 100 0.0 518.0 10 locus_11 SALM_1452 | 1-phosphofructokinase K 858 5b128d659955716833ce42f2bb060212 286 4daecf1a6ccae76e1d97a4ce9ee4ff0b 600.6 1115.4 200.2 371.8 80 64 80 80 0 0 +locus_12:11:0:3 3 11 324 324 1 324 1 324 324 0 100.0 100 100 0.0 640.0 11 locus_12 SALM_11020 _ 1 L 972 eb72da68c159497d5f0c8eeddc51b5ae 324 a1f16dd269dc295e715f2d00f9c26b43 680.4 1263.6 226.8 421.2 80 64 80 80 0 0 +locus_12:11:0:4 4 11 324 324 1 324 1 324 324 0 100.0 100 100 0.0 640.0 11 locus_12 SALM_11020 _ 1 L 972 eb72da68c159497d5f0c8eeddc51b5ae 324 a1f16dd269dc295e715f2d00f9c26b43 680.4 1263.6 226.8 421.2 80 64 80 80 0 0 +locus_13:12:0:5 5 12 366 366 1 366 1 366 366 0 100.0 100 100 0.0 754.0 12 locus_13 SALM_1934 - 3.9 M 1098 8f300259dcb46224bdc1fe5273107324 366 b16bc26e42f5bc4327504b1ec8b2d53d 768.6 1427.4 256.2 475.8 80 64 80 80 0 0 +locus_14:13:0:6 6 13 427 427 1 427 1 427 427 9 97.892 100 100 0.0 846.0 13 locus_14 SALM_2871 + @ N 1281 b9060019038526aa6fc38d2f7510edc6 427 ed11561b2bedaa12c7a28eb0e9346101 896.7 1665.3 298.9 555.1 80 64 80 80 0 0 +locus_15:14:0:7 7 14 478 478 1 478 1 478 478 0 100.0 100 100 0.0 972.0 14 locus_15 SALM_583 = DMT family transporter O 1434 bc98c2fe196a68a79036814396513a8d 478 16cb2acec887d5861327e04f2705b8ce 1003.8 1864.2 334.6 621.4 80 64 80 80 0 0 +locus_16:15:0:8 8 15 488 488 1 488 1 488 488 14 97.131 100 100 0.0 972.0 15 locus_16 SALM_780 < murein transglycosylase A P 1464 16e55766c603fe33c9e75d8e81743ae2 488 b20314e55f9713235e9c4ea5817b56df 1024.8 1903.2 341.6 634.4 80 64 80 80 0 0 +locus_17:16:0:9 9 16 612 612 1 612 1 612 612 0 100.0 100 100 0.0 1241.0 16 locus_17 SALM_1937 > GTPase HflX Q 1836 a0d97d985483413f3c18bfe5833ae9ce 612 d6012eddc7a6e8d7d40761d00ed71a5a 1285.2 2386.8 428.4 795.6 80 64 80 80 0 0 +locus_18:17:0:10 10 17 638 638 1 638 1 638 638 0 100.0 100 100 0.0 1326.0 17 locus_18 SALM_1997 ? biosynthetic arginine decarboxylase R 1914 b3021e979faa7600756c06dfadfcf14c 638 e68e83956ee1d4c685571e5348c8def1 1339.8 2488.2 446.6 829.4 80 64 80 80 0 0 +locus_19:18:0:11 11 18 679 679 1 679 1 679 679 8 98.822 100 100 0.0 1403.0 18 locus_19 SALM_9926 , https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662 S 2037 a012eee23637b48e39b00808a057e35d 679 1302fea1dbd5bb756db34e09b863ad44 1425.9 2648.1 475.3 882.7 80 64 80 80 0 0 +locus_2:1:0:12 12 1 95 95 1 95 1 95 95 11 88.421 100 100 2.89e-51 147.0 1 locus_2 SALM_120 @ outer membrane protein B 285 e35184c8ff18e9116fc8faef20532f56 95 2d9e7f7f0d11bf02db860b0799e7924d 199.5 370.5 66.5 123.5 80 64 80 80 0 0 +locus_20:19:0:13 13 19 1645 1645 1 1645 1 1645 1645 0 100.0 100 100 0.0 3332.0 19 locus_20 SALM_6064 . alpha-2-macroglobulin family protein T 4935 4461918e985715e4a2b07494e1f91326 1645 12ff39a1933fa478729b142976b0a659 3454.5 6415.5 1151.5 2138.5 80 64 80 80 0 0 +locus_3:2:0:14 14 2 109 109 1 109 1 109 109 0 100.0 100 100 3.09e-81 224.0 2 locus_3 SALM_2016 # tRNA (guanosine(46)-N7)-methyltransferase TrmB C 327 670705cd2a59c4a23a897ac656a888fe 109 4277f8bbf1fd6682001da089fea83d04 228.9 425.1 76.3 141.7 80 64 80 80 0 0 +locus_4:3:0:15 15 3 139 139 1 139 1 139 139 9 93.525 100 100 3.6999999999999996e-98 269.0 3 locus_4 SALM_8644 $ AZ624_004720 AZ624_004720 417 ac1b21798c0f672ad26f5a91ea278590 139 0c25367401155278f34832f184ab44e6 291.9 542.1 97.3 180.7 80 64 80 80 0 0 +locus_5:4:0:16 16 4 148 148 1 148 1 148 148 11 92.568 100 100 1.5600000000000001e-97 268.0 4 locus_5 SALM_1876 % SPI-1 type III secretion system invasion lipoprotein InvH E 444 d00defcca8588f21ce16fa1d0ac13389 148 c8bf12a8057fc5e541fcd4924136a40d 310.8 577.2 103.6 192.4 80 64 80 80 0 0 +locus_6:5:0:17 17 5 181 181 1 181 1 181 181 0 100.0 100 100 1.7899999999999999e-140 380.0 5 locus_6 SALM_640 ^ MOSC domain-containing protein F 543 a11561f2804e2c32c78049f8b9aeb517 181 1f6a45ea291940ef2c17ec1cfdd5fbdd 380.1 705.9 126.7 235.3 80 64 80 80 0 0 +locus_7:6:0:18 18 6 202 202 1 202 1 202 202 14 93.069 100 100 2.36e-141 384.0 6 locus_7 SALM_1501 & India: Vellore G 606 dc94bf1ec4ff9bed2a1f460cbd958656 202 62252b3326997117f127efb88ff09294 424.2 787.8 141.4 262.6 80 64 80 80 0 0 +locus_8:7:0:19 19 7 214 214 1 214 1 214 214 0 100.0 100 100 1.75e-157 426.0 7 locus_8 SALM_756 * DNA polymerase III subunit delta' H 642 7ebe74afecf146ec4db816c8deced64f 214 2449629747a7c58f0f2cad411db87178 449.4 834.6 149.8 278.2 80 64 80 80 0 0 +locus_9:8:0:20 20 8 228 228 1 228 1 228 228 0 100.0 100 100 6.32e-172 463.0 8 locus_9 SALM_7353 1 fimbrial assembly chaperone I 684 41ebb36872854b2b33c8c028e23d8ad1 228 a75a93991228940fb46d917f238beb5a 478.8 889.2 159.6 296.4 80 64 80 80 0 0 diff --git a/tests/test_data/outputs/report/conservative/G12/nucleotide.hits.txt b/tests/test_data/outputs/report/conservative/G12/nucleotide.hits.txt new file mode 100755 index 0000000..119f807 --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G12/nucleotide.hits.txt @@ -0,0 +1,22 @@ +query_name qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore seq_id locus_name locus_name_alt locus_product locus_description locus_uid dna_seq_len dna_seq_hash aa_seq_len aa_seq_hash dna_min_len dna_max_len aa_min_len aa_max_len dna_min_ident aa_min_ident min_dna_match_cov min_aa_match_cov count_int_stops dna_ambig_count +locus_1:0:0:0 0 0 102 102 1 102 1 102 102 0 100.0 100 100 plus 1.8e-51 189 0 locus_1 SALM_11273 ! hypothetical protein 1 102 d17b02d12afa7f832ee37df6f24a8f55 34 84ac553cb45dd790c497c27152888f02 71.4 132.6 23.8 44.2 80 64 80 80 0 0 +locus_10:9:0:1 1 9 762 762 1 762 1 762 762 0 100.0 100 100 plus 0.0 1408 9 locus_10 SALM_1891 200.96 5'/3'-nucleotidase SurE J 762 fe04d17ec353c08b903c85fc0ca4dc02 254 bd09702e070040e0fc8d2ec3b830812c 533.4 990.6 177.8 330.2 80 64 80 80 0 0 +locus_11:10:0:2 2 10 858 858 1 858 1 858 858 0 100.0 100 100 plus 0.0 1585 10 locus_11 SALM_1452 | 1-phosphofructokinase K 858 5b128d659955716833ce42f2bb060212 286 4daecf1a6ccae76e1d97a4ce9ee4ff0b 600.6 1115.4 200.2 371.8 80 64 80 80 0 0 +locus_12:11:0:3 3 11 972 972 1 972 1 972 972 0 100.0 100 100 plus 0.0 1796 11 locus_12 SALM_11020 _ 1 L 972 eb72da68c159497d5f0c8eeddc51b5ae 324 a1f16dd269dc295e715f2d00f9c26b43 680.4 1263.6 226.8 421.2 80 64 80 80 0 0 +locus_12:11:0:4 4 11 972 972 1 972 1 972 972 0 100.0 100 100 plus 0.0 1796 11 locus_12 SALM_11020 _ 1 L 972 eb72da68c159497d5f0c8eeddc51b5ae 324 a1f16dd269dc295e715f2d00f9c26b43 680.4 1263.6 226.8 421.2 80 64 80 80 0 0 +locus_13:12:0:5 5 12 1098 1098 1 1098 1 1098 1098 0 100.0 100 100 plus 0.0 2028 12 locus_13 SALM_1934 - 3.9 M 1098 8f300259dcb46224bdc1fe5273107324 366 b16bc26e42f5bc4327504b1ec8b2d53d 768.6 1427.4 256.2 475.8 80 64 80 80 0 0 +locus_14:13:0:6 6 13 1281 1281 1 1281 1 1281 1281 0 100.0 100 100 plus 0.0 2366 13 locus_14 SALM_2871 + @ N 1281 b9060019038526aa6fc38d2f7510edc6 427 ed11561b2bedaa12c7a28eb0e9346101 896.7 1665.3 298.9 555.1 80 64 80 80 0 0 +locus_15:14:0:7 7 14 1434 1434 1 1434 1 1434 1434 0 100.0 100 100 plus 0.0 2649 14 locus_15 SALM_583 = DMT family transporter O 1434 bc98c2fe196a68a79036814396513a8d 478 16cb2acec887d5861327e04f2705b8ce 1003.8 1864.2 334.6 621.4 80 64 80 80 0 0 +locus_16:15:0:8 8 15 1464 1464 1 1464 1 1464 1464 0 100.0 100 100 plus 0.0 2704 15 locus_16 SALM_780 < murein transglycosylase A P 1464 16e55766c603fe33c9e75d8e81743ae2 488 b20314e55f9713235e9c4ea5817b56df 1024.8 1903.2 341.6 634.4 80 64 80 80 0 0 +locus_17:16:0:9 9 16 1836 1836 1 1836 1 1836 1836 0 100.0 100 100 plus 0.0 3391 16 locus_17 SALM_1937 > GTPase HflX Q 1836 a0d97d985483413f3c18bfe5833ae9ce 612 d6012eddc7a6e8d7d40761d00ed71a5a 1285.2 2386.8 428.4 795.6 80 64 80 80 0 0 +locus_18:17:0:10 10 17 1914 1914 1 1914 1 1914 1914 0 100.0 100 100 plus 0.0 3535 17 locus_18 SALM_1997 ? biosynthetic arginine decarboxylase R 1914 b3021e979faa7600756c06dfadfcf14c 638 e68e83956ee1d4c685571e5348c8def1 1339.8 2488.2 446.6 829.4 80 64 80 80 0 0 +locus_19:18:0:11 11 18 2037 2037 1 2037 1 2037 2037 0 100.0 100 100 plus 0.0 3762 18 locus_19 SALM_9926 , https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662 S 2037 a012eee23637b48e39b00808a057e35d 679 1302fea1dbd5bb756db34e09b863ad44 1425.9 2648.1 475.3 882.7 80 64 80 80 0 0 +locus_2:1:0:12 12 1 285 285 1 285 1 285 285 0 100.0 100 100 plus 1.03e-152 527 1 locus_2 SALM_120 @ outer membrane protein B 285 e35184c8ff18e9116fc8faef20532f56 95 2d9e7f7f0d11bf02db860b0799e7924d 199.5 370.5 66.5 123.5 80 64 80 80 0 0 +locus_20:19:0:13 13 19 4935 4935 1 4935 1 4935 4935 0 100.0 100 100 plus 0.0 9114 19 locus_20 SALM_6064 . alpha-2-macroglobulin family protein T 4935 4461918e985715e4a2b07494e1f91326 1645 12ff39a1933fa478729b142976b0a659 3454.5 6415.5 1151.5 2138.5 80 64 80 80 0 0 +locus_3:2:0:14 14 2 327 327 1 327 1 327 327 0 100.0 100 100 plus 5.34e-176 604 2 locus_3 SALM_2016 # tRNA (guanosine(46)-N7)-methyltransferase TrmB C 327 670705cd2a59c4a23a897ac656a888fe 109 4277f8bbf1fd6682001da089fea83d04 228.9 425.1 76.3 141.7 80 64 80 80 0 0 +locus_4:3:0:15 15 3 417 417 1 417 1 417 417 0 100.0 100 100 plus 0.0 771 3 locus_4 SALM_8644 $ AZ624_004720 AZ624_004720 417 ac1b21798c0f672ad26f5a91ea278590 139 0c25367401155278f34832f184ab44e6 291.9 542.1 97.3 180.7 80 64 80 80 0 0 +locus_5:4:0:16 16 4 444 444 1 444 1 444 444 0 100.0 100 100 plus 0.0 821 4 locus_5 SALM_1876 % SPI-1 type III secretion system invasion lipoprotein InvH E 444 d00defcca8588f21ce16fa1d0ac13389 148 c8bf12a8057fc5e541fcd4924136a40d 310.8 577.2 103.6 192.4 80 64 80 80 0 0 +locus_6:5:0:17 17 5 543 543 1 543 1 543 543 0 100.0 100 100 plus 0.0 1003 5 locus_6 SALM_640 ^ MOSC domain-containing protein F 543 a11561f2804e2c32c78049f8b9aeb517 181 1f6a45ea291940ef2c17ec1cfdd5fbdd 380.1 705.9 126.7 235.3 80 64 80 80 0 0 +locus_7:6:0:18 18 6 606 606 1 606 1 606 606 0 100.0 100 100 plus 0.0 1120 6 locus_7 SALM_1501 & India: Vellore G 606 dc94bf1ec4ff9bed2a1f460cbd958656 202 62252b3326997117f127efb88ff09294 424.2 787.8 141.4 262.6 80 64 80 80 0 0 +locus_8:7:0:19 19 7 642 642 1 642 1 642 642 0 100.0 100 100 plus 0.0 1186 7 locus_8 SALM_756 * DNA polymerase III subunit delta' H 642 7ebe74afecf146ec4db816c8deced64f 214 2449629747a7c58f0f2cad411db87178 449.4 834.6 149.8 278.2 80 64 80 80 0 0 +locus_9:8:0:20 20 8 684 684 1 684 1 684 684 0 100.0 100 100 plus 0.0 1264 8 locus_9 SALM_7353 1 fimbrial assembly chaperone I 684 41ebb36872854b2b33c8c028e23d8ad1 228 a75a93991228940fb46d917f238beb5a 478.8 889.2 159.6 296.4 80 64 80 80 0 0 diff --git a/tests/test_data/outputs/report/conservative/G12/profile.json b/tests/test_data/outputs/report/conservative/G12/profile.json new file mode 100755 index 0000000..c07fbc4 --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G12/profile.json @@ -0,0 +1,24 @@ +{ + "G12": { + "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", + "locus_2": "e35184c8ff18e9116fc8faef20532f56", + "locus_3": "670705cd2a59c4a23a897ac656a888fe", + "locus_4": "ac1b21798c0f672ad26f5a91ea278590", + "locus_5": "d00defcca8588f21ce16fa1d0ac13389", + "locus_6": "a11561f2804e2c32c78049f8b9aeb517", + "locus_7": "dc94bf1ec4ff9bed2a1f460cbd958656", + "locus_8": "7ebe74afecf146ec4db816c8deced64f", + "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", + "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", + "locus_11": "5b128d659955716833ce42f2bb060212", + "locus_12": "-", + "locus_13": "8f300259dcb46224bdc1fe5273107324", + "locus_14": "b9060019038526aa6fc38d2f7510edc6", + "locus_15": "bc98c2fe196a68a79036814396513a8d", + "locus_16": "16e55766c603fe33c9e75d8e81743ae2", + "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", + "locus_18": "b3021e979faa7600756c06dfadfcf14c", + "locus_19": "a012eee23637b48e39b00808a057e35d", + "locus_20": "4461918e985715e4a2b07494e1f91326" + } +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/conservative/G12/protein.hits.txt b/tests/test_data/outputs/report/conservative/G12/protein.hits.txt new file mode 100755 index 0000000..ea3fc4d --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G12/protein.hits.txt @@ -0,0 +1,22 @@ +query_name qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore seq_id locus_name locus_name_alt locus_product locus_description locus_uid dna_seq_len dna_seq_hash aa_seq_len aa_seq_hash dna_min_len dna_max_len aa_min_len aa_max_len dna_min_ident aa_min_ident min_dna_match_cov min_aa_match_cov count_int_stops dna_ambig_count +locus_1:0:0:0 0 0 34 34 1 34 1 34 34 0 100.0 100 100 4.94e-20 64.3 0 locus_1 SALM_11273 ! hypothetical protein 1 102 d17b02d12afa7f832ee37df6f24a8f55 34 84ac553cb45dd790c497c27152888f02 71.4 132.6 23.8 44.2 80 64 80 80 0 0 +locus_10:9:0:1 1 9 254 254 1 254 1 254 254 0 100.0 100 100 0.0 514.0 9 locus_10 SALM_1891 200.96 5'/3'-nucleotidase SurE J 762 fe04d17ec353c08b903c85fc0ca4dc02 254 bd09702e070040e0fc8d2ec3b830812c 533.4 990.6 177.8 330.2 80 64 80 80 0 0 +locus_11:10:0:2 2 10 286 286 1 286 1 286 286 0 100.0 100 100 0.0 579.0 10 locus_11 SALM_1452 | 1-phosphofructokinase K 858 5b128d659955716833ce42f2bb060212 286 4daecf1a6ccae76e1d97a4ce9ee4ff0b 600.6 1115.4 200.2 371.8 80 64 80 80 0 0 +locus_12:11:0:3 3 11 324 324 1 324 1 324 324 0 100.0 100 100 0.0 640.0 11 locus_12 SALM_11020 _ 1 L 972 eb72da68c159497d5f0c8eeddc51b5ae 324 a1f16dd269dc295e715f2d00f9c26b43 680.4 1263.6 226.8 421.2 80 64 80 80 0 0 +locus_12:11:0:4 4 11 324 324 1 324 1 324 324 0 100.0 100 100 0.0 640.0 11 locus_12 SALM_11020 _ 1 L 972 eb72da68c159497d5f0c8eeddc51b5ae 324 a1f16dd269dc295e715f2d00f9c26b43 680.4 1263.6 226.8 421.2 80 64 80 80 0 0 +locus_13:12:0:5 5 12 366 366 1 366 1 366 366 0 100.0 100 100 0.0 754.0 12 locus_13 SALM_1934 - 3.9 M 1098 8f300259dcb46224bdc1fe5273107324 366 b16bc26e42f5bc4327504b1ec8b2d53d 768.6 1427.4 256.2 475.8 80 64 80 80 0 0 +locus_14:13:0:6 6 13 427 427 1 427 1 427 427 0 100.0 100 100 0.0 869.0 13 locus_14 SALM_2871 + @ N 1281 b9060019038526aa6fc38d2f7510edc6 427 ed11561b2bedaa12c7a28eb0e9346101 896.7 1665.3 298.9 555.1 80 64 80 80 0 0 +locus_15:14:0:7 7 14 478 478 1 478 1 478 478 0 100.0 100 100 0.0 972.0 14 locus_15 SALM_583 = DMT family transporter O 1434 bc98c2fe196a68a79036814396513a8d 478 16cb2acec887d5861327e04f2705b8ce 1003.8 1864.2 334.6 621.4 80 64 80 80 0 0 +locus_16:15:0:8 8 15 488 488 1 488 1 488 488 0 100.0 100 100 0.0 1004.0 15 locus_16 SALM_780 < murein transglycosylase A P 1464 16e55766c603fe33c9e75d8e81743ae2 488 b20314e55f9713235e9c4ea5817b56df 1024.8 1903.2 341.6 634.4 80 64 80 80 0 0 +locus_17:16:0:9 9 16 612 612 1 612 1 612 612 0 100.0 100 100 0.0 1241.0 16 locus_17 SALM_1937 > GTPase HflX Q 1836 a0d97d985483413f3c18bfe5833ae9ce 612 d6012eddc7a6e8d7d40761d00ed71a5a 1285.2 2386.8 428.4 795.6 80 64 80 80 0 0 +locus_18:17:0:10 10 17 638 638 1 638 1 638 638 0 100.0 100 100 0.0 1326.0 17 locus_18 SALM_1997 ? biosynthetic arginine decarboxylase R 1914 b3021e979faa7600756c06dfadfcf14c 638 e68e83956ee1d4c685571e5348c8def1 1339.8 2488.2 446.6 829.4 80 64 80 80 0 0 +locus_19:18:0:11 11 18 679 679 1 679 1 679 679 0 100.0 100 100 0.0 1419.0 18 locus_19 SALM_9926 , https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662 S 2037 a012eee23637b48e39b00808a057e35d 679 1302fea1dbd5bb756db34e09b863ad44 1425.9 2648.1 475.3 882.7 80 64 80 80 0 0 +locus_2:1:0:12 12 1 95 95 1 95 1 95 95 0 100.0 100 100 1.2299999999999998e-68 191.0 1 locus_2 SALM_120 @ outer membrane protein B 285 e35184c8ff18e9116fc8faef20532f56 95 2d9e7f7f0d11bf02db860b0799e7924d 199.5 370.5 66.5 123.5 80 64 80 80 0 0 +locus_20:19:0:13 13 19 1645 1645 1 1645 1 1645 1645 0 100.0 100 100 0.0 3332.0 19 locus_20 SALM_6064 . alpha-2-macroglobulin family protein T 4935 4461918e985715e4a2b07494e1f91326 1645 12ff39a1933fa478729b142976b0a659 3454.5 6415.5 1151.5 2138.5 80 64 80 80 0 0 +locus_3:2:0:14 14 2 109 109 1 109 1 109 109 0 100.0 100 100 3.09e-81 224.0 2 locus_3 SALM_2016 # tRNA (guanosine(46)-N7)-methyltransferase TrmB C 327 670705cd2a59c4a23a897ac656a888fe 109 4277f8bbf1fd6682001da089fea83d04 228.9 425.1 76.3 141.7 80 64 80 80 0 0 +locus_4:3:0:15 15 3 139 139 1 139 1 139 139 0 100.0 100 100 2.34e-106 290.0 3 locus_4 SALM_8644 $ AZ624_004720 AZ624_004720 417 ac1b21798c0f672ad26f5a91ea278590 139 0c25367401155278f34832f184ab44e6 291.9 542.1 97.3 180.7 80 64 80 80 0 0 +locus_5:4:0:16 16 4 148 148 1 148 1 148 148 0 100.0 100 100 1.0400000000000001e-110 302.0 4 locus_5 SALM_1876 % SPI-1 type III secretion system invasion lipoprotein InvH E 444 d00defcca8588f21ce16fa1d0ac13389 148 c8bf12a8057fc5e541fcd4924136a40d 310.8 577.2 103.6 192.4 80 64 80 80 0 0 +locus_6:5:0:17 17 5 181 181 1 181 1 181 181 0 100.0 100 100 1.7899999999999999e-140 380.0 5 locus_6 SALM_640 ^ MOSC domain-containing protein F 543 a11561f2804e2c32c78049f8b9aeb517 181 1f6a45ea291940ef2c17ec1cfdd5fbdd 380.1 705.9 126.7 235.3 80 64 80 80 0 0 +locus_7:6:0:18 18 6 202 202 1 202 1 202 202 0 100.0 100 100 3.71e-154 416.0 6 locus_7 SALM_1501 & India: Vellore G 606 dc94bf1ec4ff9bed2a1f460cbd958656 202 62252b3326997117f127efb88ff09294 424.2 787.8 141.4 262.6 80 64 80 80 0 0 +locus_8:7:0:19 19 7 214 214 1 214 1 214 214 0 100.0 100 100 1.75e-157 426.0 7 locus_8 SALM_756 * DNA polymerase III subunit delta' H 642 7ebe74afecf146ec4db816c8deced64f 214 2449629747a7c58f0f2cad411db87178 449.4 834.6 149.8 278.2 80 64 80 80 0 0 +locus_9:8:0:20 20 8 228 228 1 228 1 228 228 0 100.0 100 100 6.32e-172 463.0 8 locus_9 SALM_7353 1 fimbrial assembly chaperone I 684 41ebb36872854b2b33c8c028e23d8ad1 228 a75a93991228940fb46d917f238beb5a 478.8 889.2 159.6 296.4 80 64 80 80 0 0 diff --git a/tests/test_data/outputs/report/conservative/G13/nucleotide.hits.txt b/tests/test_data/outputs/report/conservative/G13/nucleotide.hits.txt new file mode 100755 index 0000000..a9dead4 --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G13/nucleotide.hits.txt @@ -0,0 +1,22 @@ +query_name qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore seq_id locus_name locus_name_alt locus_product locus_description locus_uid dna_seq_len dna_seq_hash aa_seq_len aa_seq_hash dna_min_len dna_max_len aa_min_len aa_max_len dna_min_ident aa_min_ident min_dna_match_cov min_aa_match_cov count_int_stops dna_ambig_count +locus_1:0:0:0 0 0 102 102 1 102 1 102 102 0 100.0 100 100 plus 1.8e-51 189 0 locus_1 SALM_11273 ! hypothetical protein 1 102 d17b02d12afa7f832ee37df6f24a8f55 34 84ac553cb45dd790c497c27152888f02 71.4 132.6 23.8 44.2 80 64 80 80 0 0 +locus_10:9:0:1 1 9 762 762 1 762 1 762 762 0 100.0 100 100 plus 0.0 1408 9 locus_10 SALM_1891 200.96 5'/3'-nucleotidase SurE J 762 fe04d17ec353c08b903c85fc0ca4dc02 254 bd09702e070040e0fc8d2ec3b830812c 533.4 990.6 177.8 330.2 80 64 80 80 0 0 +locus_11:10:0:2 2 10 858 858 1 858 1 858 858 19 97.786 100 100 plus 0.0 1480 10 locus_11 SALM_1452 | 1-phosphofructokinase K 858 5b128d659955716833ce42f2bb060212 286 4daecf1a6ccae76e1d97a4ce9ee4ff0b 600.6 1115.4 200.2 371.8 80 64 80 80 0 0 +locus_12:11:0:3 3 11 972 972 1 972 1 972 972 0 100.0 100 100 plus 0.0 1796 11 locus_12 SALM_11020 _ 1 L 972 eb72da68c159497d5f0c8eeddc51b5ae 324 a1f16dd269dc295e715f2d00f9c26b43 680.4 1263.6 226.8 421.2 80 64 80 80 0 0 +locus_13:12:0:4 4 12 1098 1098 1 1098 1 1098 1098 0 100.0 100 100 plus 0.0 2028 12 locus_13 SALM_1934 - 3.9 M 1098 8f300259dcb46224bdc1fe5273107324 366 b16bc26e42f5bc4327504b1ec8b2d53d 768.6 1427.4 256.2 475.8 80 64 80 80 0 0 +locus_13:12:0:5 5 12 1098 1098 1 1098 1 1098 1098 0 100.0 100 100 plus 0.0 2028 12 locus_13 SALM_1934 - 3.9 M 1098 8f300259dcb46224bdc1fe5273107324 366 b16bc26e42f5bc4327504b1ec8b2d53d 768.6 1427.4 256.2 475.8 80 64 80 80 0 0 +locus_14:13:0:6 6 13 1281 1281 1 1281 1 1281 1281 11 99.141 100 100 plus 0.0 2305 13 locus_14 SALM_2871 + @ N 1281 b9060019038526aa6fc38d2f7510edc6 427 ed11561b2bedaa12c7a28eb0e9346101 896.7 1665.3 298.9 555.1 80 64 80 80 0 0 +locus_15:14:0:7 7 14 1434 1434 1 1434 1 1434 1434 0 100.0 100 100 plus 0.0 2649 14 locus_15 SALM_583 = DMT family transporter O 1434 bc98c2fe196a68a79036814396513a8d 478 16cb2acec887d5861327e04f2705b8ce 1003.8 1864.2 334.6 621.4 80 64 80 80 0 0 +locus_16:15:0:8 8 15 1464 1464 1 1464 1 1464 1464 15 98.975 100 100 plus 0.0 2621 15 locus_16 SALM_780 < murein transglycosylase A P 1464 16e55766c603fe33c9e75d8e81743ae2 488 b20314e55f9713235e9c4ea5817b56df 1024.8 1903.2 341.6 634.4 80 64 80 80 0 0 +locus_17:16:0:9 9 16 1836 1836 1 1836 1 1836 1836 0 100.0 100 100 plus 0.0 3391 16 locus_17 SALM_1937 > GTPase HflX Q 1836 a0d97d985483413f3c18bfe5833ae9ce 612 d6012eddc7a6e8d7d40761d00ed71a5a 1285.2 2386.8 428.4 795.6 80 64 80 80 0 0 +locus_18:17:0:10 10 17 1914 1914 1 1914 1 1914 1914 0 100.0 100 100 plus 0.0 3535 17 locus_18 SALM_1997 ? biosynthetic arginine decarboxylase R 1914 b3021e979faa7600756c06dfadfcf14c 638 e68e83956ee1d4c685571e5348c8def1 1339.8 2488.2 446.6 829.4 80 64 80 80 0 0 +locus_19:18:0:11 11 18 2037 2037 1 2037 1 2037 2037 16 99.215 100 100 plus 0.0 3674 18 locus_19 SALM_9926 , https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662 S 2037 a012eee23637b48e39b00808a057e35d 679 1302fea1dbd5bb756db34e09b863ad44 1425.9 2648.1 475.3 882.7 80 64 80 80 0 0 +locus_2:1:0:12 12 1 285 285 1 285 1 285 285 17 94.035 100 100 plus 2.3100000000000004e-124 433 1 locus_2 SALM_120 @ outer membrane protein B 285 e35184c8ff18e9116fc8faef20532f56 95 2d9e7f7f0d11bf02db860b0799e7924d 199.5 370.5 66.5 123.5 80 64 80 80 0 0 +locus_20:19:0:13 13 19 4935 4935 1 4935 1 4935 4935 0 100.0 100 100 plus 0.0 9114 19 locus_20 SALM_6064 . alpha-2-macroglobulin family protein T 4935 4461918e985715e4a2b07494e1f91326 1645 12ff39a1933fa478729b142976b0a659 3454.5 6415.5 1151.5 2138.5 80 64 80 80 0 0 +locus_3:2:0:14 14 2 327 327 1 327 1 327 327 0 100.0 100 100 plus 5.34e-176 604 2 locus_3 SALM_2016 # tRNA (guanosine(46)-N7)-methyltransferase TrmB C 327 670705cd2a59c4a23a897ac656a888fe 109 4277f8bbf1fd6682001da089fea83d04 228.9 425.1 76.3 141.7 80 64 80 80 0 0 +locus_4:3:0:15 15 3 417 417 1 417 1 417 417 11 97.362 100 100 plus 0.0 710 3 locus_4 SALM_8644 $ AZ624_004720 AZ624_004720 417 ac1b21798c0f672ad26f5a91ea278590 139 0c25367401155278f34832f184ab44e6 291.9 542.1 97.3 180.7 80 64 80 80 0 0 +locus_5:4:0:16 16 4 444 444 1 444 1 444 444 15 96.622 100 100 plus 0.0 737 4 locus_5 SALM_1876 % SPI-1 type III secretion system invasion lipoprotein InvH E 444 d00defcca8588f21ce16fa1d0ac13389 148 c8bf12a8057fc5e541fcd4924136a40d 310.8 577.2 103.6 192.4 80 64 80 80 0 0 +locus_6:5:0:17 17 5 543 543 1 543 1 543 543 0 100.0 100 100 plus 0.0 1003 5 locus_6 SALM_640 ^ MOSC domain-containing protein F 543 a11561f2804e2c32c78049f8b9aeb517 181 1f6a45ea291940ef2c17ec1cfdd5fbdd 380.1 705.9 126.7 235.3 80 64 80 80 0 0 +locus_7:6:0:18 18 6 606 606 1 606 1 606 606 15 97.525 100 100 plus 0.0 1037 6 locus_7 SALM_1501 & India: Vellore G 606 dc94bf1ec4ff9bed2a1f460cbd958656 202 62252b3326997117f127efb88ff09294 424.2 787.8 141.4 262.6 80 64 80 80 0 0 +locus_8:7:0:19 19 7 642 642 1 642 1 642 642 0 100.0 100 100 plus 0.0 1186 7 locus_8 SALM_756 * DNA polymerase III subunit delta' H 642 7ebe74afecf146ec4db816c8deced64f 214 2449629747a7c58f0f2cad411db87178 449.4 834.6 149.8 278.2 80 64 80 80 0 0 +locus_9:8:0:20 20 8 684 684 1 684 1 684 684 0 100.0 100 100 plus 0.0 1264 8 locus_9 SALM_7353 1 fimbrial assembly chaperone I 684 41ebb36872854b2b33c8c028e23d8ad1 228 a75a93991228940fb46d917f238beb5a 478.8 889.2 159.6 296.4 80 64 80 80 0 0 diff --git a/tests/test_data/outputs/report/conservative/G13/profile.json b/tests/test_data/outputs/report/conservative/G13/profile.json new file mode 100755 index 0000000..e7435f9 --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G13/profile.json @@ -0,0 +1,24 @@ +{ + "G13": { + "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", + "locus_2": "8b70e777f6bbf2c91ff75947824b5976", + "locus_3": "670705cd2a59c4a23a897ac656a888fe", + "locus_4": "73790840c76943caac0ebb3b2b3f0b98", + "locus_5": "8cf4341689dd00f74adfcc43d1f4a35e", + "locus_6": "a11561f2804e2c32c78049f8b9aeb517", + "locus_7": "49d9878c9d3071aa1d2f26cb947b784c", + "locus_8": "7ebe74afecf146ec4db816c8deced64f", + "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", + "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", + "locus_11": "c4266f2f24fdd8e039113c6b0955af9f", + "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", + "locus_13": "-", + "locus_14": "2fa0b06ed72e36b4071cab9d0b4f87d0", + "locus_15": "bc98c2fe196a68a79036814396513a8d", + "locus_16": "a9b3cb97dac3cda6e932a49bf9a507bd", + "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", + "locus_18": "b3021e979faa7600756c06dfadfcf14c", + "locus_19": "de32372598811d63bcc1a0eaf6872644", + "locus_20": "4461918e985715e4a2b07494e1f91326" + } +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/conservative/G13/protein.hits.txt b/tests/test_data/outputs/report/conservative/G13/protein.hits.txt new file mode 100755 index 0000000..e711712 --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G13/protein.hits.txt @@ -0,0 +1,22 @@ +query_name qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore seq_id locus_name locus_name_alt locus_product locus_description locus_uid dna_seq_len dna_seq_hash aa_seq_len aa_seq_hash dna_min_len dna_max_len aa_min_len aa_max_len dna_min_ident aa_min_ident min_dna_match_cov min_aa_match_cov count_int_stops dna_ambig_count +locus_1:0:0:0 0 0 34 34 1 34 1 34 34 0 100.0 100 100 4.94e-20 64.3 0 locus_1 SALM_11273 ! hypothetical protein 1 102 d17b02d12afa7f832ee37df6f24a8f55 34 84ac553cb45dd790c497c27152888f02 71.4 132.6 23.8 44.2 80 64 80 80 0 0 +locus_10:9:0:1 1 9 254 254 1 254 1 254 254 0 100.0 100 100 0.0 514.0 9 locus_10 SALM_1891 200.96 5'/3'-nucleotidase SurE J 762 fe04d17ec353c08b903c85fc0ca4dc02 254 bd09702e070040e0fc8d2ec3b830812c 533.4 990.6 177.8 330.2 80 64 80 80 0 0 +locus_11:10:0:2 2 10 286 286 1 286 1 286 286 17 94.056 100 100 0.0 518.0 10 locus_11 SALM_1452 | 1-phosphofructokinase K 858 5b128d659955716833ce42f2bb060212 286 4daecf1a6ccae76e1d97a4ce9ee4ff0b 600.6 1115.4 200.2 371.8 80 64 80 80 0 0 +locus_12:11:0:3 3 11 324 324 1 324 1 324 324 0 100.0 100 100 0.0 640.0 11 locus_12 SALM_11020 _ 1 L 972 eb72da68c159497d5f0c8eeddc51b5ae 324 a1f16dd269dc295e715f2d00f9c26b43 680.4 1263.6 226.8 421.2 80 64 80 80 0 0 +locus_13:12:0:4 4 12 366 366 1 366 1 366 366 0 100.0 100 100 0.0 754.0 12 locus_13 SALM_1934 - 3.9 M 1098 8f300259dcb46224bdc1fe5273107324 366 b16bc26e42f5bc4327504b1ec8b2d53d 768.6 1427.4 256.2 475.8 80 64 80 80 0 0 +locus_13:12:0:5 5 12 366 366 1 366 1 366 366 0 100.0 100 100 0.0 754.0 12 locus_13 SALM_1934 - 3.9 M 1098 8f300259dcb46224bdc1fe5273107324 366 b16bc26e42f5bc4327504b1ec8b2d53d 768.6 1427.4 256.2 475.8 80 64 80 80 0 0 +locus_14:13:0:6 6 13 427 427 1 427 1 427 427 9 97.892 100 100 0.0 846.0 13 locus_14 SALM_2871 + @ N 1281 b9060019038526aa6fc38d2f7510edc6 427 ed11561b2bedaa12c7a28eb0e9346101 896.7 1665.3 298.9 555.1 80 64 80 80 0 0 +locus_15:14:0:7 7 14 478 478 1 478 1 478 478 0 100.0 100 100 0.0 972.0 14 locus_15 SALM_583 = DMT family transporter O 1434 bc98c2fe196a68a79036814396513a8d 478 16cb2acec887d5861327e04f2705b8ce 1003.8 1864.2 334.6 621.4 80 64 80 80 0 0 +locus_16:15:0:8 8 15 488 488 1 488 1 488 488 14 97.131 100 100 0.0 972.0 15 locus_16 SALM_780 < murein transglycosylase A P 1464 16e55766c603fe33c9e75d8e81743ae2 488 b20314e55f9713235e9c4ea5817b56df 1024.8 1903.2 341.6 634.4 80 64 80 80 0 0 +locus_17:16:0:9 9 16 612 612 1 612 1 612 612 0 100.0 100 100 0.0 1241.0 16 locus_17 SALM_1937 > GTPase HflX Q 1836 a0d97d985483413f3c18bfe5833ae9ce 612 d6012eddc7a6e8d7d40761d00ed71a5a 1285.2 2386.8 428.4 795.6 80 64 80 80 0 0 +locus_18:17:0:10 10 17 638 638 1 638 1 638 638 0 100.0 100 100 0.0 1326.0 17 locus_18 SALM_1997 ? biosynthetic arginine decarboxylase R 1914 b3021e979faa7600756c06dfadfcf14c 638 e68e83956ee1d4c685571e5348c8def1 1339.8 2488.2 446.6 829.4 80 64 80 80 0 0 +locus_19:18:0:11 11 18 679 679 1 679 1 679 679 8 98.822 100 100 0.0 1403.0 18 locus_19 SALM_9926 , https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662 S 2037 a012eee23637b48e39b00808a057e35d 679 1302fea1dbd5bb756db34e09b863ad44 1425.9 2648.1 475.3 882.7 80 64 80 80 0 0 +locus_2:1:0:12 12 1 95 95 1 95 1 95 95 11 88.421 100 100 2.89e-51 147.0 1 locus_2 SALM_120 @ outer membrane protein B 285 e35184c8ff18e9116fc8faef20532f56 95 2d9e7f7f0d11bf02db860b0799e7924d 199.5 370.5 66.5 123.5 80 64 80 80 0 0 +locus_20:19:0:13 13 19 1645 1645 1 1645 1 1645 1645 0 100.0 100 100 0.0 3332.0 19 locus_20 SALM_6064 . alpha-2-macroglobulin family protein T 4935 4461918e985715e4a2b07494e1f91326 1645 12ff39a1933fa478729b142976b0a659 3454.5 6415.5 1151.5 2138.5 80 64 80 80 0 0 +locus_3:2:0:14 14 2 109 109 1 109 1 109 109 0 100.0 100 100 3.09e-81 224.0 2 locus_3 SALM_2016 # tRNA (guanosine(46)-N7)-methyltransferase TrmB C 327 670705cd2a59c4a23a897ac656a888fe 109 4277f8bbf1fd6682001da089fea83d04 228.9 425.1 76.3 141.7 80 64 80 80 0 0 +locus_4:3:0:15 15 3 139 139 1 139 1 139 139 9 93.525 100 100 3.6999999999999996e-98 269.0 3 locus_4 SALM_8644 $ AZ624_004720 AZ624_004720 417 ac1b21798c0f672ad26f5a91ea278590 139 0c25367401155278f34832f184ab44e6 291.9 542.1 97.3 180.7 80 64 80 80 0 0 +locus_5:4:0:16 16 4 148 148 1 148 1 148 148 11 92.568 100 100 1.5600000000000001e-97 268.0 4 locus_5 SALM_1876 % SPI-1 type III secretion system invasion lipoprotein InvH E 444 d00defcca8588f21ce16fa1d0ac13389 148 c8bf12a8057fc5e541fcd4924136a40d 310.8 577.2 103.6 192.4 80 64 80 80 0 0 +locus_6:5:0:17 17 5 181 181 1 181 1 181 181 0 100.0 100 100 1.7899999999999999e-140 380.0 5 locus_6 SALM_640 ^ MOSC domain-containing protein F 543 a11561f2804e2c32c78049f8b9aeb517 181 1f6a45ea291940ef2c17ec1cfdd5fbdd 380.1 705.9 126.7 235.3 80 64 80 80 0 0 +locus_7:6:0:18 18 6 202 202 1 202 1 202 202 14 93.069 100 100 2.36e-141 384.0 6 locus_7 SALM_1501 & India: Vellore G 606 dc94bf1ec4ff9bed2a1f460cbd958656 202 62252b3326997117f127efb88ff09294 424.2 787.8 141.4 262.6 80 64 80 80 0 0 +locus_8:7:0:19 19 7 214 214 1 214 1 214 214 0 100.0 100 100 1.75e-157 426.0 7 locus_8 SALM_756 * DNA polymerase III subunit delta' H 642 7ebe74afecf146ec4db816c8deced64f 214 2449629747a7c58f0f2cad411db87178 449.4 834.6 149.8 278.2 80 64 80 80 0 0 +locus_9:8:0:20 20 8 228 228 1 228 1 228 228 0 100.0 100 100 6.32e-172 463.0 8 locus_9 SALM_7353 1 fimbrial assembly chaperone I 684 41ebb36872854b2b33c8c028e23d8ad1 228 a75a93991228940fb46d917f238beb5a 478.8 889.2 159.6 296.4 80 64 80 80 0 0 diff --git a/tests/test_data/outputs/report/conservative/G14/nucleotide.hits.txt b/tests/test_data/outputs/report/conservative/G14/nucleotide.hits.txt new file mode 100755 index 0000000..d0efc02 --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G14/nucleotide.hits.txt @@ -0,0 +1,22 @@ +query_name qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore seq_id locus_name locus_name_alt locus_product locus_description locus_uid dna_seq_len dna_seq_hash aa_seq_len aa_seq_hash dna_min_len dna_max_len aa_min_len aa_max_len dna_min_ident aa_min_ident min_dna_match_cov min_aa_match_cov count_int_stops dna_ambig_count +locus_1:0:0:0 0 0 102 102 1 102 1 102 102 0 100.0 100 100 plus 1.8e-51 189 0 locus_1 SALM_11273 ! hypothetical protein 1 102 d17b02d12afa7f832ee37df6f24a8f55 34 84ac553cb45dd790c497c27152888f02 71.4 132.6 23.8 44.2 80 64 80 80 0 0 +locus_10:9:0:1 1 9 762 762 1 762 1 762 762 0 100.0 100 100 plus 0.0 1408 9 locus_10 SALM_1891 200.96 5'/3'-nucleotidase SurE J 762 fe04d17ec353c08b903c85fc0ca4dc02 254 bd09702e070040e0fc8d2ec3b830812c 533.4 990.6 177.8 330.2 80 64 80 80 0 0 +locus_11:10:0:2 2 10 858 858 1 858 1 858 858 0 100.0 100 100 plus 0.0 1585 10 locus_11 SALM_1452 | 1-phosphofructokinase K 858 5b128d659955716833ce42f2bb060212 286 4daecf1a6ccae76e1d97a4ce9ee4ff0b 600.6 1115.4 200.2 371.8 80 64 80 80 0 0 +locus_12:11:0:3 3 11 972 972 1 972 1 972 972 0 100.0 100 100 plus 0.0 1796 11 locus_12 SALM_11020 _ 1 L 972 eb72da68c159497d5f0c8eeddc51b5ae 324 a1f16dd269dc295e715f2d00f9c26b43 680.4 1263.6 226.8 421.2 80 64 80 80 0 0 +locus_13:12:0:4 4 12 1098 1098 1 1098 1 1098 1098 0 100.0 100 100 plus 0.0 2028 12 locus_13 SALM_1934 - 3.9 M 1098 8f300259dcb46224bdc1fe5273107324 366 b16bc26e42f5bc4327504b1ec8b2d53d 768.6 1427.4 256.2 475.8 80 64 80 80 0 0 +locus_13:12:0:5 5 12 1098 1098 1 1098 1 1098 1098 0 100.0 100 100 plus 0.0 2028 12 locus_13 SALM_1934 - 3.9 M 1098 8f300259dcb46224bdc1fe5273107324 366 b16bc26e42f5bc4327504b1ec8b2d53d 768.6 1427.4 256.2 475.8 80 64 80 80 0 0 +locus_14:13:0:6 6 13 1281 1281 1 1281 1 1281 1281 0 100.0 100 100 plus 0.0 2366 13 locus_14 SALM_2871 + @ N 1281 b9060019038526aa6fc38d2f7510edc6 427 ed11561b2bedaa12c7a28eb0e9346101 896.7 1665.3 298.9 555.1 80 64 80 80 0 0 +locus_15:14:0:7 7 14 1434 1434 1 1434 1 1434 1434 0 100.0 100 100 plus 0.0 2649 14 locus_15 SALM_583 = DMT family transporter O 1434 bc98c2fe196a68a79036814396513a8d 478 16cb2acec887d5861327e04f2705b8ce 1003.8 1864.2 334.6 621.4 80 64 80 80 0 0 +locus_16:15:0:8 8 15 1464 1464 1 1464 1 1464 1464 0 100.0 100 100 plus 0.0 2704 15 locus_16 SALM_780 < murein transglycosylase A P 1464 16e55766c603fe33c9e75d8e81743ae2 488 b20314e55f9713235e9c4ea5817b56df 1024.8 1903.2 341.6 634.4 80 64 80 80 0 0 +locus_17:16:0:9 9 16 1836 1836 1 1836 1 1836 1836 0 100.0 100 100 plus 0.0 3391 16 locus_17 SALM_1937 > GTPase HflX Q 1836 a0d97d985483413f3c18bfe5833ae9ce 612 d6012eddc7a6e8d7d40761d00ed71a5a 1285.2 2386.8 428.4 795.6 80 64 80 80 0 0 +locus_18:17:0:10 10 17 1914 1914 1 1914 1 1914 1914 0 100.0 100 100 plus 0.0 3535 17 locus_18 SALM_1997 ? biosynthetic arginine decarboxylase R 1914 b3021e979faa7600756c06dfadfcf14c 638 e68e83956ee1d4c685571e5348c8def1 1339.8 2488.2 446.6 829.4 80 64 80 80 0 0 +locus_19:18:0:11 11 18 2037 2037 1 2037 1 2037 2037 0 100.0 100 100 plus 0.0 3762 18 locus_19 SALM_9926 , https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662 S 2037 a012eee23637b48e39b00808a057e35d 679 1302fea1dbd5bb756db34e09b863ad44 1425.9 2648.1 475.3 882.7 80 64 80 80 0 0 +locus_2:1:0:12 12 1 285 285 1 285 1 285 285 0 100.0 100 100 plus 1.03e-152 527 1 locus_2 SALM_120 @ outer membrane protein B 285 e35184c8ff18e9116fc8faef20532f56 95 2d9e7f7f0d11bf02db860b0799e7924d 199.5 370.5 66.5 123.5 80 64 80 80 0 0 +locus_20:19:0:13 13 19 4935 4935 1 4935 1 4935 4935 0 100.0 100 100 plus 0.0 9114 19 locus_20 SALM_6064 . alpha-2-macroglobulin family protein T 4935 4461918e985715e4a2b07494e1f91326 1645 12ff39a1933fa478729b142976b0a659 3454.5 6415.5 1151.5 2138.5 80 64 80 80 0 0 +locus_3:2:0:14 14 2 327 327 1 327 1 327 327 0 100.0 100 100 plus 5.34e-176 604 2 locus_3 SALM_2016 # tRNA (guanosine(46)-N7)-methyltransferase TrmB C 327 670705cd2a59c4a23a897ac656a888fe 109 4277f8bbf1fd6682001da089fea83d04 228.9 425.1 76.3 141.7 80 64 80 80 0 0 +locus_4:3:0:15 15 3 417 417 1 417 1 417 417 0 100.0 100 100 plus 0.0 771 3 locus_4 SALM_8644 $ AZ624_004720 AZ624_004720 417 ac1b21798c0f672ad26f5a91ea278590 139 0c25367401155278f34832f184ab44e6 291.9 542.1 97.3 180.7 80 64 80 80 0 0 +locus_5:4:0:16 16 4 444 444 1 444 1 444 444 0 100.0 100 100 plus 0.0 821 4 locus_5 SALM_1876 % SPI-1 type III secretion system invasion lipoprotein InvH E 444 d00defcca8588f21ce16fa1d0ac13389 148 c8bf12a8057fc5e541fcd4924136a40d 310.8 577.2 103.6 192.4 80 64 80 80 0 0 +locus_6:5:0:17 17 5 543 543 1 543 1 543 543 0 100.0 100 100 plus 0.0 1003 5 locus_6 SALM_640 ^ MOSC domain-containing protein F 543 a11561f2804e2c32c78049f8b9aeb517 181 1f6a45ea291940ef2c17ec1cfdd5fbdd 380.1 705.9 126.7 235.3 80 64 80 80 0 0 +locus_7:6:0:18 18 6 606 606 1 606 1 606 606 0 100.0 100 100 plus 0.0 1120 6 locus_7 SALM_1501 & India: Vellore G 606 dc94bf1ec4ff9bed2a1f460cbd958656 202 62252b3326997117f127efb88ff09294 424.2 787.8 141.4 262.6 80 64 80 80 0 0 +locus_8:7:0:19 19 7 642 642 1 642 1 642 642 0 100.0 100 100 plus 0.0 1186 7 locus_8 SALM_756 * DNA polymerase III subunit delta' H 642 7ebe74afecf146ec4db816c8deced64f 214 2449629747a7c58f0f2cad411db87178 449.4 834.6 149.8 278.2 80 64 80 80 0 0 +locus_9:8:0:20 20 8 684 684 1 684 1 684 684 0 100.0 100 100 plus 0.0 1264 8 locus_9 SALM_7353 1 fimbrial assembly chaperone I 684 41ebb36872854b2b33c8c028e23d8ad1 228 a75a93991228940fb46d917f238beb5a 478.8 889.2 159.6 296.4 80 64 80 80 0 0 diff --git a/tests/test_data/outputs/report/conservative/G14/profile.json b/tests/test_data/outputs/report/conservative/G14/profile.json new file mode 100755 index 0000000..7295749 --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G14/profile.json @@ -0,0 +1,24 @@ +{ + "G14": { + "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", + "locus_2": "e35184c8ff18e9116fc8faef20532f56", + "locus_3": "670705cd2a59c4a23a897ac656a888fe", + "locus_4": "ac1b21798c0f672ad26f5a91ea278590", + "locus_5": "d00defcca8588f21ce16fa1d0ac13389", + "locus_6": "a11561f2804e2c32c78049f8b9aeb517", + "locus_7": "dc94bf1ec4ff9bed2a1f460cbd958656", + "locus_8": "7ebe74afecf146ec4db816c8deced64f", + "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", + "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", + "locus_11": "5b128d659955716833ce42f2bb060212", + "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", + "locus_13": "-", + "locus_14": "b9060019038526aa6fc38d2f7510edc6", + "locus_15": "bc98c2fe196a68a79036814396513a8d", + "locus_16": "16e55766c603fe33c9e75d8e81743ae2", + "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", + "locus_18": "b3021e979faa7600756c06dfadfcf14c", + "locus_19": "a012eee23637b48e39b00808a057e35d", + "locus_20": "4461918e985715e4a2b07494e1f91326" + } +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/conservative/G14/protein.hits.txt b/tests/test_data/outputs/report/conservative/G14/protein.hits.txt new file mode 100755 index 0000000..f4627bc --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G14/protein.hits.txt @@ -0,0 +1,22 @@ +query_name qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore seq_id locus_name locus_name_alt locus_product locus_description locus_uid dna_seq_len dna_seq_hash aa_seq_len aa_seq_hash dna_min_len dna_max_len aa_min_len aa_max_len dna_min_ident aa_min_ident min_dna_match_cov min_aa_match_cov count_int_stops dna_ambig_count +locus_1:0:0:0 0 0 34 34 1 34 1 34 34 0 100.0 100 100 4.94e-20 64.3 0 locus_1 SALM_11273 ! hypothetical protein 1 102 d17b02d12afa7f832ee37df6f24a8f55 34 84ac553cb45dd790c497c27152888f02 71.4 132.6 23.8 44.2 80 64 80 80 0 0 +locus_10:9:0:1 1 9 254 254 1 254 1 254 254 0 100.0 100 100 0.0 514.0 9 locus_10 SALM_1891 200.96 5'/3'-nucleotidase SurE J 762 fe04d17ec353c08b903c85fc0ca4dc02 254 bd09702e070040e0fc8d2ec3b830812c 533.4 990.6 177.8 330.2 80 64 80 80 0 0 +locus_11:10:0:2 2 10 286 286 1 286 1 286 286 0 100.0 100 100 0.0 579.0 10 locus_11 SALM_1452 | 1-phosphofructokinase K 858 5b128d659955716833ce42f2bb060212 286 4daecf1a6ccae76e1d97a4ce9ee4ff0b 600.6 1115.4 200.2 371.8 80 64 80 80 0 0 +locus_12:11:0:3 3 11 324 324 1 324 1 324 324 0 100.0 100 100 0.0 640.0 11 locus_12 SALM_11020 _ 1 L 972 eb72da68c159497d5f0c8eeddc51b5ae 324 a1f16dd269dc295e715f2d00f9c26b43 680.4 1263.6 226.8 421.2 80 64 80 80 0 0 +locus_13:12:0:4 4 12 366 366 1 366 1 366 366 0 100.0 100 100 0.0 754.0 12 locus_13 SALM_1934 - 3.9 M 1098 8f300259dcb46224bdc1fe5273107324 366 b16bc26e42f5bc4327504b1ec8b2d53d 768.6 1427.4 256.2 475.8 80 64 80 80 0 0 +locus_13:12:0:5 5 12 366 366 1 366 1 366 366 0 100.0 100 100 0.0 754.0 12 locus_13 SALM_1934 - 3.9 M 1098 8f300259dcb46224bdc1fe5273107324 366 b16bc26e42f5bc4327504b1ec8b2d53d 768.6 1427.4 256.2 475.8 80 64 80 80 0 0 +locus_14:13:0:6 6 13 427 427 1 427 1 427 427 0 100.0 100 100 0.0 869.0 13 locus_14 SALM_2871 + @ N 1281 b9060019038526aa6fc38d2f7510edc6 427 ed11561b2bedaa12c7a28eb0e9346101 896.7 1665.3 298.9 555.1 80 64 80 80 0 0 +locus_15:14:0:7 7 14 478 478 1 478 1 478 478 0 100.0 100 100 0.0 972.0 14 locus_15 SALM_583 = DMT family transporter O 1434 bc98c2fe196a68a79036814396513a8d 478 16cb2acec887d5861327e04f2705b8ce 1003.8 1864.2 334.6 621.4 80 64 80 80 0 0 +locus_16:15:0:8 8 15 488 488 1 488 1 488 488 0 100.0 100 100 0.0 1004.0 15 locus_16 SALM_780 < murein transglycosylase A P 1464 16e55766c603fe33c9e75d8e81743ae2 488 b20314e55f9713235e9c4ea5817b56df 1024.8 1903.2 341.6 634.4 80 64 80 80 0 0 +locus_17:16:0:9 9 16 612 612 1 612 1 612 612 0 100.0 100 100 0.0 1241.0 16 locus_17 SALM_1937 > GTPase HflX Q 1836 a0d97d985483413f3c18bfe5833ae9ce 612 d6012eddc7a6e8d7d40761d00ed71a5a 1285.2 2386.8 428.4 795.6 80 64 80 80 0 0 +locus_18:17:0:10 10 17 638 638 1 638 1 638 638 0 100.0 100 100 0.0 1326.0 17 locus_18 SALM_1997 ? biosynthetic arginine decarboxylase R 1914 b3021e979faa7600756c06dfadfcf14c 638 e68e83956ee1d4c685571e5348c8def1 1339.8 2488.2 446.6 829.4 80 64 80 80 0 0 +locus_19:18:0:11 11 18 679 679 1 679 1 679 679 0 100.0 100 100 0.0 1419.0 18 locus_19 SALM_9926 , https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662 S 2037 a012eee23637b48e39b00808a057e35d 679 1302fea1dbd5bb756db34e09b863ad44 1425.9 2648.1 475.3 882.7 80 64 80 80 0 0 +locus_2:1:0:12 12 1 95 95 1 95 1 95 95 0 100.0 100 100 1.2299999999999998e-68 191.0 1 locus_2 SALM_120 @ outer membrane protein B 285 e35184c8ff18e9116fc8faef20532f56 95 2d9e7f7f0d11bf02db860b0799e7924d 199.5 370.5 66.5 123.5 80 64 80 80 0 0 +locus_20:19:0:13 13 19 1645 1645 1 1645 1 1645 1645 0 100.0 100 100 0.0 3332.0 19 locus_20 SALM_6064 . alpha-2-macroglobulin family protein T 4935 4461918e985715e4a2b07494e1f91326 1645 12ff39a1933fa478729b142976b0a659 3454.5 6415.5 1151.5 2138.5 80 64 80 80 0 0 +locus_3:2:0:14 14 2 109 109 1 109 1 109 109 0 100.0 100 100 3.09e-81 224.0 2 locus_3 SALM_2016 # tRNA (guanosine(46)-N7)-methyltransferase TrmB C 327 670705cd2a59c4a23a897ac656a888fe 109 4277f8bbf1fd6682001da089fea83d04 228.9 425.1 76.3 141.7 80 64 80 80 0 0 +locus_4:3:0:15 15 3 139 139 1 139 1 139 139 0 100.0 100 100 2.34e-106 290.0 3 locus_4 SALM_8644 $ AZ624_004720 AZ624_004720 417 ac1b21798c0f672ad26f5a91ea278590 139 0c25367401155278f34832f184ab44e6 291.9 542.1 97.3 180.7 80 64 80 80 0 0 +locus_5:4:0:16 16 4 148 148 1 148 1 148 148 0 100.0 100 100 1.0400000000000001e-110 302.0 4 locus_5 SALM_1876 % SPI-1 type III secretion system invasion lipoprotein InvH E 444 d00defcca8588f21ce16fa1d0ac13389 148 c8bf12a8057fc5e541fcd4924136a40d 310.8 577.2 103.6 192.4 80 64 80 80 0 0 +locus_6:5:0:17 17 5 181 181 1 181 1 181 181 0 100.0 100 100 1.7899999999999999e-140 380.0 5 locus_6 SALM_640 ^ MOSC domain-containing protein F 543 a11561f2804e2c32c78049f8b9aeb517 181 1f6a45ea291940ef2c17ec1cfdd5fbdd 380.1 705.9 126.7 235.3 80 64 80 80 0 0 +locus_7:6:0:18 18 6 202 202 1 202 1 202 202 0 100.0 100 100 3.71e-154 416.0 6 locus_7 SALM_1501 & India: Vellore G 606 dc94bf1ec4ff9bed2a1f460cbd958656 202 62252b3326997117f127efb88ff09294 424.2 787.8 141.4 262.6 80 64 80 80 0 0 +locus_8:7:0:19 19 7 214 214 1 214 1 214 214 0 100.0 100 100 1.75e-157 426.0 7 locus_8 SALM_756 * DNA polymerase III subunit delta' H 642 7ebe74afecf146ec4db816c8deced64f 214 2449629747a7c58f0f2cad411db87178 449.4 834.6 149.8 278.2 80 64 80 80 0 0 +locus_9:8:0:20 20 8 228 228 1 228 1 228 228 0 100.0 100 100 6.32e-172 463.0 8 locus_9 SALM_7353 1 fimbrial assembly chaperone I 684 41ebb36872854b2b33c8c028e23d8ad1 228 a75a93991228940fb46d917f238beb5a 478.8 889.2 159.6 296.4 80 64 80 80 0 0 diff --git a/tests/test_data/outputs/report/conservative/G2/nucleotide.hits.txt b/tests/test_data/outputs/report/conservative/G2/nucleotide.hits.txt new file mode 100755 index 0000000..ca5e559 --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G2/nucleotide.hits.txt @@ -0,0 +1,21 @@ +query_name qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore seq_id locus_name locus_name_alt locus_product locus_description locus_uid dna_seq_len dna_seq_hash aa_seq_len aa_seq_hash dna_min_len dna_max_len aa_min_len aa_max_len dna_min_ident aa_min_ident min_dna_match_cov min_aa_match_cov count_int_stops dna_ambig_count +locus_1:0:0:0 0 0 102 102 1 102 1 102 102 0 100.0 100 100 plus 1.8e-51 189 0 locus_1 SALM_11273 ! hypothetical protein 1 102 d17b02d12afa7f832ee37df6f24a8f55 34 84ac553cb45dd790c497c27152888f02 71.4 132.6 23.8 44.2 80 64 80 80 0 0 +locus_10:9:0:1 1 9 762 762 1 762 1 762 762 0 100.0 100 100 plus 0.0 1408 9 locus_10 SALM_1891 200.96 5'/3'-nucleotidase SurE J 762 fe04d17ec353c08b903c85fc0ca4dc02 254 bd09702e070040e0fc8d2ec3b830812c 533.4 990.6 177.8 330.2 80 64 80 80 0 0 +locus_11:10:0:2 2 10 858 858 1 858 1 858 858 0 100.0 100 100 plus 0.0 1585 10 locus_11 SALM_1452 | 1-phosphofructokinase K 858 5b128d659955716833ce42f2bb060212 286 4daecf1a6ccae76e1d97a4ce9ee4ff0b 600.6 1115.4 200.2 371.8 80 64 80 80 0 0 +locus_12:11:0:3 3 11 972 972 1 972 1 972 972 0 100.0 100 100 plus 0.0 1796 11 locus_12 SALM_11020 _ 1 L 972 eb72da68c159497d5f0c8eeddc51b5ae 324 a1f16dd269dc295e715f2d00f9c26b43 680.4 1263.6 226.8 421.2 80 64 80 80 0 0 +locus_13:12:0:4 4 12 1098 1098 1 1098 1 1098 1098 0 100.0 100 100 plus 0.0 2028 12 locus_13 SALM_1934 - 3.9 M 1098 8f300259dcb46224bdc1fe5273107324 366 b16bc26e42f5bc4327504b1ec8b2d53d 768.6 1427.4 256.2 475.8 80 64 80 80 0 0 +locus_14:13:0:5 5 13 1281 1281 1 1281 1 1281 1281 0 100.0 100 100 plus 0.0 2366 13 locus_14 SALM_2871 + @ N 1281 b9060019038526aa6fc38d2f7510edc6 427 ed11561b2bedaa12c7a28eb0e9346101 896.7 1665.3 298.9 555.1 80 64 80 80 0 0 +locus_15:14:0:6 6 14 1434 1434 1 1434 1 1434 1434 0 100.0 100 100 plus 0.0 2649 14 locus_15 SALM_583 = DMT family transporter O 1434 bc98c2fe196a68a79036814396513a8d 478 16cb2acec887d5861327e04f2705b8ce 1003.8 1864.2 334.6 621.4 80 64 80 80 0 0 +locus_16:15:0:7 7 15 1464 1464 1 1464 1 1464 1464 0 100.0 100 100 plus 0.0 2704 15 locus_16 SALM_780 < murein transglycosylase A P 1464 16e55766c603fe33c9e75d8e81743ae2 488 b20314e55f9713235e9c4ea5817b56df 1024.8 1903.2 341.6 634.4 80 64 80 80 0 0 +locus_17:16:0:8 8 16 1836 1836 1 1836 1 1836 1836 0 100.0 100 100 plus 0.0 3391 16 locus_17 SALM_1937 > GTPase HflX Q 1836 a0d97d985483413f3c18bfe5833ae9ce 612 d6012eddc7a6e8d7d40761d00ed71a5a 1285.2 2386.8 428.4 795.6 80 64 80 80 0 0 +locus_18:17:0:9 9 17 1914 1914 1 1914 1 1914 1914 0 100.0 100 100 plus 0.0 3535 17 locus_18 SALM_1997 ? biosynthetic arginine decarboxylase R 1914 b3021e979faa7600756c06dfadfcf14c 638 e68e83956ee1d4c685571e5348c8def1 1339.8 2488.2 446.6 829.4 80 64 80 80 0 0 +locus_19:18:0:10 10 18 2037 2037 1 2037 1 2037 2037 0 100.0 100 100 plus 0.0 3762 18 locus_19 SALM_9926 , https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662 S 2037 a012eee23637b48e39b00808a057e35d 679 1302fea1dbd5bb756db34e09b863ad44 1425.9 2648.1 475.3 882.7 80 64 80 80 0 0 +locus_2:1:0:11 11 1 285 285 1 285 1 285 285 0 100.0 100 100 plus 1.03e-152 527 1 locus_2 SALM_120 @ outer membrane protein B 285 e35184c8ff18e9116fc8faef20532f56 95 2d9e7f7f0d11bf02db860b0799e7924d 199.5 370.5 66.5 123.5 80 64 80 80 0 0 +locus_20:19:0:12 12 19 4935 4935 1 4935 1 4935 4935 0 100.0 100 100 plus 0.0 9114 19 locus_20 SALM_6064 . alpha-2-macroglobulin family protein T 4935 4461918e985715e4a2b07494e1f91326 1645 12ff39a1933fa478729b142976b0a659 3454.5 6415.5 1151.5 2138.5 80 64 80 80 0 0 +locus_3:2:0:13 13 2 327 327 1 327 1 327 327 0 100.0 100 100 plus 5.34e-176 604 2 locus_3 SALM_2016 # tRNA (guanosine(46)-N7)-methyltransferase TrmB C 327 670705cd2a59c4a23a897ac656a888fe 109 4277f8bbf1fd6682001da089fea83d04 228.9 425.1 76.3 141.7 80 64 80 80 0 0 +locus_4:3:0:14 14 3 417 417 1 417 1 417 417 0 100.0 100 100 plus 0.0 771 3 locus_4 SALM_8644 $ AZ624_004720 AZ624_004720 417 ac1b21798c0f672ad26f5a91ea278590 139 0c25367401155278f34832f184ab44e6 291.9 542.1 97.3 180.7 80 64 80 80 0 0 +locus_5:4:0:15 15 4 444 444 1 444 1 444 444 0 100.0 100 100 plus 0.0 821 4 locus_5 SALM_1876 % SPI-1 type III secretion system invasion lipoprotein InvH E 444 d00defcca8588f21ce16fa1d0ac13389 148 c8bf12a8057fc5e541fcd4924136a40d 310.8 577.2 103.6 192.4 80 64 80 80 0 0 +locus_6:5:0:16 16 5 543 543 1 543 1 543 543 0 100.0 100 100 plus 0.0 1003 5 locus_6 SALM_640 ^ MOSC domain-containing protein F 543 a11561f2804e2c32c78049f8b9aeb517 181 1f6a45ea291940ef2c17ec1cfdd5fbdd 380.1 705.9 126.7 235.3 80 64 80 80 0 0 +locus_7:6:0:17 17 6 606 606 1 606 1 606 606 0 100.0 100 100 plus 0.0 1120 6 locus_7 SALM_1501 & India: Vellore G 606 dc94bf1ec4ff9bed2a1f460cbd958656 202 62252b3326997117f127efb88ff09294 424.2 787.8 141.4 262.6 80 64 80 80 0 0 +locus_8:7:0:18 18 7 642 642 1 642 1 642 642 0 100.0 100 100 plus 0.0 1186 7 locus_8 SALM_756 * DNA polymerase III subunit delta' H 642 7ebe74afecf146ec4db816c8deced64f 214 2449629747a7c58f0f2cad411db87178 449.4 834.6 149.8 278.2 80 64 80 80 0 0 +locus_9:8:0:19 19 8 684 684 1 684 1 684 684 0 100.0 100 100 plus 0.0 1264 8 locus_9 SALM_7353 1 fimbrial assembly chaperone I 684 41ebb36872854b2b33c8c028e23d8ad1 228 a75a93991228940fb46d917f238beb5a 478.8 889.2 159.6 296.4 80 64 80 80 0 0 diff --git a/tests/test_data/outputs/report/conservative/G2/profile.json b/tests/test_data/outputs/report/conservative/G2/profile.json new file mode 100755 index 0000000..3b8c03f --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G2/profile.json @@ -0,0 +1,24 @@ +{ + "G2": { + "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", + "locus_2": "e35184c8ff18e9116fc8faef20532f56", + "locus_3": "670705cd2a59c4a23a897ac656a888fe", + "locus_4": "ac1b21798c0f672ad26f5a91ea278590", + "locus_5": "d00defcca8588f21ce16fa1d0ac13389", + "locus_6": "a11561f2804e2c32c78049f8b9aeb517", + "locus_7": "dc94bf1ec4ff9bed2a1f460cbd958656", + "locus_8": "7ebe74afecf146ec4db816c8deced64f", + "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", + "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", + "locus_11": "5b128d659955716833ce42f2bb060212", + "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", + "locus_13": "8f300259dcb46224bdc1fe5273107324", + "locus_14": "b9060019038526aa6fc38d2f7510edc6", + "locus_15": "bc98c2fe196a68a79036814396513a8d", + "locus_16": "16e55766c603fe33c9e75d8e81743ae2", + "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", + "locus_18": "b3021e979faa7600756c06dfadfcf14c", + "locus_19": "a012eee23637b48e39b00808a057e35d", + "locus_20": "4461918e985715e4a2b07494e1f91326" + } +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/conservative/G2/protein.hits.txt b/tests/test_data/outputs/report/conservative/G2/protein.hits.txt new file mode 100755 index 0000000..974fe9e --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G2/protein.hits.txt @@ -0,0 +1,21 @@ +query_name qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore seq_id locus_name locus_name_alt locus_product locus_description locus_uid dna_seq_len dna_seq_hash aa_seq_len aa_seq_hash dna_min_len dna_max_len aa_min_len aa_max_len dna_min_ident aa_min_ident min_dna_match_cov min_aa_match_cov count_int_stops dna_ambig_count +locus_1:0:0:0 0 0 34 34 1 34 1 34 34 0 100.0 100 100 4.94e-20 64.3 0 locus_1 SALM_11273 ! hypothetical protein 1 102 d17b02d12afa7f832ee37df6f24a8f55 34 84ac553cb45dd790c497c27152888f02 71.4 132.6 23.8 44.2 80 64 80 80 0 0 +locus_10:9:0:1 1 9 254 254 1 254 1 254 254 0 100.0 100 100 0.0 514.0 9 locus_10 SALM_1891 200.96 5'/3'-nucleotidase SurE J 762 fe04d17ec353c08b903c85fc0ca4dc02 254 bd09702e070040e0fc8d2ec3b830812c 533.4 990.6 177.8 330.2 80 64 80 80 0 0 +locus_11:10:0:2 2 10 286 286 1 286 1 286 286 0 100.0 100 100 0.0 579.0 10 locus_11 SALM_1452 | 1-phosphofructokinase K 858 5b128d659955716833ce42f2bb060212 286 4daecf1a6ccae76e1d97a4ce9ee4ff0b 600.6 1115.4 200.2 371.8 80 64 80 80 0 0 +locus_12:11:0:3 3 11 324 324 1 324 1 324 324 0 100.0 100 100 0.0 640.0 11 locus_12 SALM_11020 _ 1 L 972 eb72da68c159497d5f0c8eeddc51b5ae 324 a1f16dd269dc295e715f2d00f9c26b43 680.4 1263.6 226.8 421.2 80 64 80 80 0 0 +locus_13:12:0:4 4 12 366 366 1 366 1 366 366 0 100.0 100 100 0.0 754.0 12 locus_13 SALM_1934 - 3.9 M 1098 8f300259dcb46224bdc1fe5273107324 366 b16bc26e42f5bc4327504b1ec8b2d53d 768.6 1427.4 256.2 475.8 80 64 80 80 0 0 +locus_14:13:0:5 5 13 427 427 1 427 1 427 427 0 100.0 100 100 0.0 869.0 13 locus_14 SALM_2871 + @ N 1281 b9060019038526aa6fc38d2f7510edc6 427 ed11561b2bedaa12c7a28eb0e9346101 896.7 1665.3 298.9 555.1 80 64 80 80 0 0 +locus_15:14:0:6 6 14 478 478 1 478 1 478 478 0 100.0 100 100 0.0 972.0 14 locus_15 SALM_583 = DMT family transporter O 1434 bc98c2fe196a68a79036814396513a8d 478 16cb2acec887d5861327e04f2705b8ce 1003.8 1864.2 334.6 621.4 80 64 80 80 0 0 +locus_16:15:0:7 7 15 488 488 1 488 1 488 488 0 100.0 100 100 0.0 1004.0 15 locus_16 SALM_780 < murein transglycosylase A P 1464 16e55766c603fe33c9e75d8e81743ae2 488 b20314e55f9713235e9c4ea5817b56df 1024.8 1903.2 341.6 634.4 80 64 80 80 0 0 +locus_17:16:0:8 8 16 612 612 1 612 1 612 612 0 100.0 100 100 0.0 1241.0 16 locus_17 SALM_1937 > GTPase HflX Q 1836 a0d97d985483413f3c18bfe5833ae9ce 612 d6012eddc7a6e8d7d40761d00ed71a5a 1285.2 2386.8 428.4 795.6 80 64 80 80 0 0 +locus_18:17:0:9 9 17 638 638 1 638 1 638 638 0 100.0 100 100 0.0 1326.0 17 locus_18 SALM_1997 ? biosynthetic arginine decarboxylase R 1914 b3021e979faa7600756c06dfadfcf14c 638 e68e83956ee1d4c685571e5348c8def1 1339.8 2488.2 446.6 829.4 80 64 80 80 0 0 +locus_19:18:0:10 10 18 679 679 1 679 1 679 679 0 100.0 100 100 0.0 1419.0 18 locus_19 SALM_9926 , https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662 S 2037 a012eee23637b48e39b00808a057e35d 679 1302fea1dbd5bb756db34e09b863ad44 1425.9 2648.1 475.3 882.7 80 64 80 80 0 0 +locus_2:1:0:11 11 1 95 95 1 95 1 95 95 0 100.0 100 100 1.2299999999999998e-68 191.0 1 locus_2 SALM_120 @ outer membrane protein B 285 e35184c8ff18e9116fc8faef20532f56 95 2d9e7f7f0d11bf02db860b0799e7924d 199.5 370.5 66.5 123.5 80 64 80 80 0 0 +locus_20:19:0:12 12 19 1645 1645 1 1645 1 1645 1645 0 100.0 100 100 0.0 3332.0 19 locus_20 SALM_6064 . alpha-2-macroglobulin family protein T 4935 4461918e985715e4a2b07494e1f91326 1645 12ff39a1933fa478729b142976b0a659 3454.5 6415.5 1151.5 2138.5 80 64 80 80 0 0 +locus_3:2:0:13 13 2 109 109 1 109 1 109 109 0 100.0 100 100 3.09e-81 224.0 2 locus_3 SALM_2016 # tRNA (guanosine(46)-N7)-methyltransferase TrmB C 327 670705cd2a59c4a23a897ac656a888fe 109 4277f8bbf1fd6682001da089fea83d04 228.9 425.1 76.3 141.7 80 64 80 80 0 0 +locus_4:3:0:14 14 3 139 139 1 139 1 139 139 0 100.0 100 100 2.34e-106 290.0 3 locus_4 SALM_8644 $ AZ624_004720 AZ624_004720 417 ac1b21798c0f672ad26f5a91ea278590 139 0c25367401155278f34832f184ab44e6 291.9 542.1 97.3 180.7 80 64 80 80 0 0 +locus_5:4:0:15 15 4 148 148 1 148 1 148 148 0 100.0 100 100 1.0400000000000001e-110 302.0 4 locus_5 SALM_1876 % SPI-1 type III secretion system invasion lipoprotein InvH E 444 d00defcca8588f21ce16fa1d0ac13389 148 c8bf12a8057fc5e541fcd4924136a40d 310.8 577.2 103.6 192.4 80 64 80 80 0 0 +locus_6:5:0:16 16 5 181 181 1 181 1 181 181 0 100.0 100 100 1.7899999999999999e-140 380.0 5 locus_6 SALM_640 ^ MOSC domain-containing protein F 543 a11561f2804e2c32c78049f8b9aeb517 181 1f6a45ea291940ef2c17ec1cfdd5fbdd 380.1 705.9 126.7 235.3 80 64 80 80 0 0 +locus_7:6:0:17 17 6 202 202 1 202 1 202 202 0 100.0 100 100 3.71e-154 416.0 6 locus_7 SALM_1501 & India: Vellore G 606 dc94bf1ec4ff9bed2a1f460cbd958656 202 62252b3326997117f127efb88ff09294 424.2 787.8 141.4 262.6 80 64 80 80 0 0 +locus_8:7:0:18 18 7 214 214 1 214 1 214 214 0 100.0 100 100 1.75e-157 426.0 7 locus_8 SALM_756 * DNA polymerase III subunit delta' H 642 7ebe74afecf146ec4db816c8deced64f 214 2449629747a7c58f0f2cad411db87178 449.4 834.6 149.8 278.2 80 64 80 80 0 0 +locus_9:8:0:19 19 8 228 228 1 228 1 228 228 0 100.0 100 100 6.32e-172 463.0 8 locus_9 SALM_7353 1 fimbrial assembly chaperone I 684 41ebb36872854b2b33c8c028e23d8ad1 228 a75a93991228940fb46d917f238beb5a 478.8 889.2 159.6 296.4 80 64 80 80 0 0 diff --git a/tests/test_data/outputs/report/conservative/G3/nucleotide.hits.txt b/tests/test_data/outputs/report/conservative/G3/nucleotide.hits.txt new file mode 100755 index 0000000..9871163 --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G3/nucleotide.hits.txt @@ -0,0 +1,21 @@ +query_name qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore seq_id locus_name locus_name_alt locus_product locus_description locus_uid dna_seq_len dna_seq_hash aa_seq_len aa_seq_hash dna_min_len dna_max_len aa_min_len aa_max_len dna_min_ident aa_min_ident min_dna_match_cov min_aa_match_cov count_int_stops dna_ambig_count +locus_1:0:0:0 0 0 102 102 1 102 1 102 102 0 100.0 100 100 plus 1.8e-51 189 0 locus_1 SALM_11273 ! hypothetical protein 1 102 d17b02d12afa7f832ee37df6f24a8f55 34 84ac553cb45dd790c497c27152888f02 71.4 132.6 23.8 44.2 80 64 80 80 0 0 +locus_10:9:0:1 1 9 762 762 1 762 1 762 762 0 100.0 100 100 plus 0.0 1408 9 locus_10 SALM_1891 200.96 5'/3'-nucleotidase SurE J 762 fe04d17ec353c08b903c85fc0ca4dc02 254 bd09702e070040e0fc8d2ec3b830812c 533.4 990.6 177.8 330.2 80 64 80 80 0 0 +locus_11:10:0:2 2 10 858 858 1 858 1 858 858 19 97.786 100 100 plus 0.0 1480 10 locus_11 SALM_1452 | 1-phosphofructokinase K 858 5b128d659955716833ce42f2bb060212 286 4daecf1a6ccae76e1d97a4ce9ee4ff0b 600.6 1115.4 200.2 371.8 80 64 80 80 0 0 +locus_12:11:0:3 3 11 972 972 1 972 1 972 972 0 100.0 100 100 plus 0.0 1796 11 locus_12 SALM_11020 _ 1 L 972 eb72da68c159497d5f0c8eeddc51b5ae 324 a1f16dd269dc295e715f2d00f9c26b43 680.4 1263.6 226.8 421.2 80 64 80 80 0 0 +locus_13:12:0:4 4 12 1098 1098 1 1098 1 1098 1098 0 100.0 100 100 plus 0.0 2028 12 locus_13 SALM_1934 - 3.9 M 1098 8f300259dcb46224bdc1fe5273107324 366 b16bc26e42f5bc4327504b1ec8b2d53d 768.6 1427.4 256.2 475.8 80 64 80 80 0 0 +locus_14:13:0:5 5 13 1281 1281 1 1281 1 1281 1281 11 99.141 100 100 plus 0.0 2305 13 locus_14 SALM_2871 + @ N 1281 b9060019038526aa6fc38d2f7510edc6 427 ed11561b2bedaa12c7a28eb0e9346101 896.7 1665.3 298.9 555.1 80 64 80 80 0 0 +locus_15:14:0:6 6 14 1434 1434 1 1434 1 1434 1434 0 100.0 100 100 plus 0.0 2649 14 locus_15 SALM_583 = DMT family transporter O 1434 bc98c2fe196a68a79036814396513a8d 478 16cb2acec887d5861327e04f2705b8ce 1003.8 1864.2 334.6 621.4 80 64 80 80 0 0 +locus_16:15:0:7 7 15 1464 1464 1 1464 1 1464 1464 15 98.975 100 100 plus 0.0 2621 15 locus_16 SALM_780 < murein transglycosylase A P 1464 16e55766c603fe33c9e75d8e81743ae2 488 b20314e55f9713235e9c4ea5817b56df 1024.8 1903.2 341.6 634.4 80 64 80 80 0 0 +locus_17:16:0:8 8 16 1836 1836 1 1836 1 1836 1836 0 100.0 100 100 plus 0.0 3391 16 locus_17 SALM_1937 > GTPase HflX Q 1836 a0d97d985483413f3c18bfe5833ae9ce 612 d6012eddc7a6e8d7d40761d00ed71a5a 1285.2 2386.8 428.4 795.6 80 64 80 80 0 0 +locus_18:17:0:9 9 17 1914 1914 1 1914 1 1914 1914 0 100.0 100 100 plus 0.0 3535 17 locus_18 SALM_1997 ? biosynthetic arginine decarboxylase R 1914 b3021e979faa7600756c06dfadfcf14c 638 e68e83956ee1d4c685571e5348c8def1 1339.8 2488.2 446.6 829.4 80 64 80 80 0 0 +locus_19:18:0:10 10 18 2037 2037 1 2037 1 2037 2037 16 99.215 100 100 plus 0.0 3674 18 locus_19 SALM_9926 , https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662 S 2037 a012eee23637b48e39b00808a057e35d 679 1302fea1dbd5bb756db34e09b863ad44 1425.9 2648.1 475.3 882.7 80 64 80 80 0 0 +locus_2:1:0:11 11 1 285 285 1 285 1 285 285 17 94.035 100 100 plus 2.3100000000000004e-124 433 1 locus_2 SALM_120 @ outer membrane protein B 285 e35184c8ff18e9116fc8faef20532f56 95 2d9e7f7f0d11bf02db860b0799e7924d 199.5 370.5 66.5 123.5 80 64 80 80 0 0 +locus_20:19:0:12 12 19 4935 4935 1 4935 1 4935 4935 0 100.0 100 100 plus 0.0 9114 19 locus_20 SALM_6064 . alpha-2-macroglobulin family protein T 4935 4461918e985715e4a2b07494e1f91326 1645 12ff39a1933fa478729b142976b0a659 3454.5 6415.5 1151.5 2138.5 80 64 80 80 0 0 +locus_3:2:0:13 13 2 327 327 1 327 1 327 327 0 100.0 100 100 plus 5.34e-176 604 2 locus_3 SALM_2016 # tRNA (guanosine(46)-N7)-methyltransferase TrmB C 327 670705cd2a59c4a23a897ac656a888fe 109 4277f8bbf1fd6682001da089fea83d04 228.9 425.1 76.3 141.7 80 64 80 80 0 0 +locus_4:3:0:14 14 3 417 417 1 417 1 417 417 11 97.362 100 100 plus 0.0 710 3 locus_4 SALM_8644 $ AZ624_004720 AZ624_004720 417 ac1b21798c0f672ad26f5a91ea278590 139 0c25367401155278f34832f184ab44e6 291.9 542.1 97.3 180.7 80 64 80 80 0 0 +locus_5:4:0:15 15 4 444 444 1 444 1 444 444 15 96.622 100 100 plus 0.0 737 4 locus_5 SALM_1876 % SPI-1 type III secretion system invasion lipoprotein InvH E 444 d00defcca8588f21ce16fa1d0ac13389 148 c8bf12a8057fc5e541fcd4924136a40d 310.8 577.2 103.6 192.4 80 64 80 80 0 0 +locus_6:5:0:16 16 5 543 543 1 543 1 543 543 0 100.0 100 100 plus 0.0 1003 5 locus_6 SALM_640 ^ MOSC domain-containing protein F 543 a11561f2804e2c32c78049f8b9aeb517 181 1f6a45ea291940ef2c17ec1cfdd5fbdd 380.1 705.9 126.7 235.3 80 64 80 80 0 0 +locus_7:6:0:17 17 6 606 606 1 606 1 606 606 15 97.525 100 100 plus 0.0 1037 6 locus_7 SALM_1501 & India: Vellore G 606 dc94bf1ec4ff9bed2a1f460cbd958656 202 62252b3326997117f127efb88ff09294 424.2 787.8 141.4 262.6 80 64 80 80 0 0 +locus_8:7:0:18 18 7 642 642 1 642 1 642 642 0 100.0 100 100 plus 0.0 1186 7 locus_8 SALM_756 * DNA polymerase III subunit delta' H 642 7ebe74afecf146ec4db816c8deced64f 214 2449629747a7c58f0f2cad411db87178 449.4 834.6 149.8 278.2 80 64 80 80 0 0 +locus_9:8:0:19 19 8 684 684 1 684 1 684 684 0 100.0 100 100 plus 0.0 1264 8 locus_9 SALM_7353 1 fimbrial assembly chaperone I 684 41ebb36872854b2b33c8c028e23d8ad1 228 a75a93991228940fb46d917f238beb5a 478.8 889.2 159.6 296.4 80 64 80 80 0 0 diff --git a/tests/test_data/outputs/report/conservative/G3/profile.json b/tests/test_data/outputs/report/conservative/G3/profile.json new file mode 100755 index 0000000..fbbe3af --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G3/profile.json @@ -0,0 +1,24 @@ +{ + "G3": { + "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", + "locus_2": "8b70e777f6bbf2c91ff75947824b5976", + "locus_3": "670705cd2a59c4a23a897ac656a888fe", + "locus_4": "73790840c76943caac0ebb3b2b3f0b98", + "locus_5": "8cf4341689dd00f74adfcc43d1f4a35e", + "locus_6": "a11561f2804e2c32c78049f8b9aeb517", + "locus_7": "49d9878c9d3071aa1d2f26cb947b784c", + "locus_8": "7ebe74afecf146ec4db816c8deced64f", + "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", + "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", + "locus_11": "c4266f2f24fdd8e039113c6b0955af9f", + "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", + "locus_13": "8f300259dcb46224bdc1fe5273107324", + "locus_14": "2fa0b06ed72e36b4071cab9d0b4f87d0", + "locus_15": "bc98c2fe196a68a79036814396513a8d", + "locus_16": "a9b3cb97dac3cda6e932a49bf9a507bd", + "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", + "locus_18": "b3021e979faa7600756c06dfadfcf14c", + "locus_19": "de32372598811d63bcc1a0eaf6872644", + "locus_20": "4461918e985715e4a2b07494e1f91326" + } +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/conservative/G3/protein.hits.txt b/tests/test_data/outputs/report/conservative/G3/protein.hits.txt new file mode 100755 index 0000000..c9b1c8a --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G3/protein.hits.txt @@ -0,0 +1,21 @@ +query_name qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore seq_id locus_name locus_name_alt locus_product locus_description locus_uid dna_seq_len dna_seq_hash aa_seq_len aa_seq_hash dna_min_len dna_max_len aa_min_len aa_max_len dna_min_ident aa_min_ident min_dna_match_cov min_aa_match_cov count_int_stops dna_ambig_count +locus_1:0:0:0 0 0 34 34 1 34 1 34 34 0 100.0 100 100 4.94e-20 64.3 0 locus_1 SALM_11273 ! hypothetical protein 1 102 d17b02d12afa7f832ee37df6f24a8f55 34 84ac553cb45dd790c497c27152888f02 71.4 132.6 23.8 44.2 80 64 80 80 0 0 +locus_10:9:0:1 1 9 254 254 1 254 1 254 254 0 100.0 100 100 0.0 514.0 9 locus_10 SALM_1891 200.96 5'/3'-nucleotidase SurE J 762 fe04d17ec353c08b903c85fc0ca4dc02 254 bd09702e070040e0fc8d2ec3b830812c 533.4 990.6 177.8 330.2 80 64 80 80 0 0 +locus_11:10:0:2 2 10 286 286 1 286 1 286 286 17 94.056 100 100 0.0 518.0 10 locus_11 SALM_1452 | 1-phosphofructokinase K 858 5b128d659955716833ce42f2bb060212 286 4daecf1a6ccae76e1d97a4ce9ee4ff0b 600.6 1115.4 200.2 371.8 80 64 80 80 0 0 +locus_12:11:0:3 3 11 324 324 1 324 1 324 324 0 100.0 100 100 0.0 640.0 11 locus_12 SALM_11020 _ 1 L 972 eb72da68c159497d5f0c8eeddc51b5ae 324 a1f16dd269dc295e715f2d00f9c26b43 680.4 1263.6 226.8 421.2 80 64 80 80 0 0 +locus_13:12:0:4 4 12 366 366 1 366 1 366 366 0 100.0 100 100 0.0 754.0 12 locus_13 SALM_1934 - 3.9 M 1098 8f300259dcb46224bdc1fe5273107324 366 b16bc26e42f5bc4327504b1ec8b2d53d 768.6 1427.4 256.2 475.8 80 64 80 80 0 0 +locus_14:13:0:5 5 13 427 427 1 427 1 427 427 9 97.892 100 100 0.0 846.0 13 locus_14 SALM_2871 + @ N 1281 b9060019038526aa6fc38d2f7510edc6 427 ed11561b2bedaa12c7a28eb0e9346101 896.7 1665.3 298.9 555.1 80 64 80 80 0 0 +locus_15:14:0:6 6 14 478 478 1 478 1 478 478 0 100.0 100 100 0.0 972.0 14 locus_15 SALM_583 = DMT family transporter O 1434 bc98c2fe196a68a79036814396513a8d 478 16cb2acec887d5861327e04f2705b8ce 1003.8 1864.2 334.6 621.4 80 64 80 80 0 0 +locus_16:15:0:7 7 15 488 488 1 488 1 488 488 14 97.131 100 100 0.0 972.0 15 locus_16 SALM_780 < murein transglycosylase A P 1464 16e55766c603fe33c9e75d8e81743ae2 488 b20314e55f9713235e9c4ea5817b56df 1024.8 1903.2 341.6 634.4 80 64 80 80 0 0 +locus_17:16:0:8 8 16 612 612 1 612 1 612 612 0 100.0 100 100 0.0 1241.0 16 locus_17 SALM_1937 > GTPase HflX Q 1836 a0d97d985483413f3c18bfe5833ae9ce 612 d6012eddc7a6e8d7d40761d00ed71a5a 1285.2 2386.8 428.4 795.6 80 64 80 80 0 0 +locus_18:17:0:9 9 17 638 638 1 638 1 638 638 0 100.0 100 100 0.0 1326.0 17 locus_18 SALM_1997 ? biosynthetic arginine decarboxylase R 1914 b3021e979faa7600756c06dfadfcf14c 638 e68e83956ee1d4c685571e5348c8def1 1339.8 2488.2 446.6 829.4 80 64 80 80 0 0 +locus_19:18:0:10 10 18 679 679 1 679 1 679 679 8 98.822 100 100 0.0 1403.0 18 locus_19 SALM_9926 , https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662 S 2037 a012eee23637b48e39b00808a057e35d 679 1302fea1dbd5bb756db34e09b863ad44 1425.9 2648.1 475.3 882.7 80 64 80 80 0 0 +locus_2:1:0:11 11 1 95 95 1 95 1 95 95 11 88.421 100 100 2.89e-51 147.0 1 locus_2 SALM_120 @ outer membrane protein B 285 e35184c8ff18e9116fc8faef20532f56 95 2d9e7f7f0d11bf02db860b0799e7924d 199.5 370.5 66.5 123.5 80 64 80 80 0 0 +locus_20:19:0:12 12 19 1645 1645 1 1645 1 1645 1645 0 100.0 100 100 0.0 3332.0 19 locus_20 SALM_6064 . alpha-2-macroglobulin family protein T 4935 4461918e985715e4a2b07494e1f91326 1645 12ff39a1933fa478729b142976b0a659 3454.5 6415.5 1151.5 2138.5 80 64 80 80 0 0 +locus_3:2:0:13 13 2 109 109 1 109 1 109 109 0 100.0 100 100 3.09e-81 224.0 2 locus_3 SALM_2016 # tRNA (guanosine(46)-N7)-methyltransferase TrmB C 327 670705cd2a59c4a23a897ac656a888fe 109 4277f8bbf1fd6682001da089fea83d04 228.9 425.1 76.3 141.7 80 64 80 80 0 0 +locus_4:3:0:14 14 3 139 139 1 139 1 139 139 9 93.525 100 100 3.6999999999999996e-98 269.0 3 locus_4 SALM_8644 $ AZ624_004720 AZ624_004720 417 ac1b21798c0f672ad26f5a91ea278590 139 0c25367401155278f34832f184ab44e6 291.9 542.1 97.3 180.7 80 64 80 80 0 0 +locus_5:4:0:15 15 4 148 148 1 148 1 148 148 11 92.568 100 100 1.5600000000000001e-97 268.0 4 locus_5 SALM_1876 % SPI-1 type III secretion system invasion lipoprotein InvH E 444 d00defcca8588f21ce16fa1d0ac13389 148 c8bf12a8057fc5e541fcd4924136a40d 310.8 577.2 103.6 192.4 80 64 80 80 0 0 +locus_6:5:0:16 16 5 181 181 1 181 1 181 181 0 100.0 100 100 1.7899999999999999e-140 380.0 5 locus_6 SALM_640 ^ MOSC domain-containing protein F 543 a11561f2804e2c32c78049f8b9aeb517 181 1f6a45ea291940ef2c17ec1cfdd5fbdd 380.1 705.9 126.7 235.3 80 64 80 80 0 0 +locus_7:6:0:17 17 6 202 202 1 202 1 202 202 14 93.069 100 100 2.36e-141 384.0 6 locus_7 SALM_1501 & India: Vellore G 606 dc94bf1ec4ff9bed2a1f460cbd958656 202 62252b3326997117f127efb88ff09294 424.2 787.8 141.4 262.6 80 64 80 80 0 0 +locus_8:7:0:18 18 7 214 214 1 214 1 214 214 0 100.0 100 100 1.75e-157 426.0 7 locus_8 SALM_756 * DNA polymerase III subunit delta' H 642 7ebe74afecf146ec4db816c8deced64f 214 2449629747a7c58f0f2cad411db87178 449.4 834.6 149.8 278.2 80 64 80 80 0 0 +locus_9:8:0:19 19 8 228 228 1 228 1 228 228 0 100.0 100 100 6.32e-172 463.0 8 locus_9 SALM_7353 1 fimbrial assembly chaperone I 684 41ebb36872854b2b33c8c028e23d8ad1 228 a75a93991228940fb46d917f238beb5a 478.8 889.2 159.6 296.4 80 64 80 80 0 0 diff --git a/tests/test_data/outputs/report/conservative/G4/nucleotide.hits.txt b/tests/test_data/outputs/report/conservative/G4/nucleotide.hits.txt new file mode 100755 index 0000000..ca5e559 --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G4/nucleotide.hits.txt @@ -0,0 +1,21 @@ +query_name qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore seq_id locus_name locus_name_alt locus_product locus_description locus_uid dna_seq_len dna_seq_hash aa_seq_len aa_seq_hash dna_min_len dna_max_len aa_min_len aa_max_len dna_min_ident aa_min_ident min_dna_match_cov min_aa_match_cov count_int_stops dna_ambig_count +locus_1:0:0:0 0 0 102 102 1 102 1 102 102 0 100.0 100 100 plus 1.8e-51 189 0 locus_1 SALM_11273 ! hypothetical protein 1 102 d17b02d12afa7f832ee37df6f24a8f55 34 84ac553cb45dd790c497c27152888f02 71.4 132.6 23.8 44.2 80 64 80 80 0 0 +locus_10:9:0:1 1 9 762 762 1 762 1 762 762 0 100.0 100 100 plus 0.0 1408 9 locus_10 SALM_1891 200.96 5'/3'-nucleotidase SurE J 762 fe04d17ec353c08b903c85fc0ca4dc02 254 bd09702e070040e0fc8d2ec3b830812c 533.4 990.6 177.8 330.2 80 64 80 80 0 0 +locus_11:10:0:2 2 10 858 858 1 858 1 858 858 0 100.0 100 100 plus 0.0 1585 10 locus_11 SALM_1452 | 1-phosphofructokinase K 858 5b128d659955716833ce42f2bb060212 286 4daecf1a6ccae76e1d97a4ce9ee4ff0b 600.6 1115.4 200.2 371.8 80 64 80 80 0 0 +locus_12:11:0:3 3 11 972 972 1 972 1 972 972 0 100.0 100 100 plus 0.0 1796 11 locus_12 SALM_11020 _ 1 L 972 eb72da68c159497d5f0c8eeddc51b5ae 324 a1f16dd269dc295e715f2d00f9c26b43 680.4 1263.6 226.8 421.2 80 64 80 80 0 0 +locus_13:12:0:4 4 12 1098 1098 1 1098 1 1098 1098 0 100.0 100 100 plus 0.0 2028 12 locus_13 SALM_1934 - 3.9 M 1098 8f300259dcb46224bdc1fe5273107324 366 b16bc26e42f5bc4327504b1ec8b2d53d 768.6 1427.4 256.2 475.8 80 64 80 80 0 0 +locus_14:13:0:5 5 13 1281 1281 1 1281 1 1281 1281 0 100.0 100 100 plus 0.0 2366 13 locus_14 SALM_2871 + @ N 1281 b9060019038526aa6fc38d2f7510edc6 427 ed11561b2bedaa12c7a28eb0e9346101 896.7 1665.3 298.9 555.1 80 64 80 80 0 0 +locus_15:14:0:6 6 14 1434 1434 1 1434 1 1434 1434 0 100.0 100 100 plus 0.0 2649 14 locus_15 SALM_583 = DMT family transporter O 1434 bc98c2fe196a68a79036814396513a8d 478 16cb2acec887d5861327e04f2705b8ce 1003.8 1864.2 334.6 621.4 80 64 80 80 0 0 +locus_16:15:0:7 7 15 1464 1464 1 1464 1 1464 1464 0 100.0 100 100 plus 0.0 2704 15 locus_16 SALM_780 < murein transglycosylase A P 1464 16e55766c603fe33c9e75d8e81743ae2 488 b20314e55f9713235e9c4ea5817b56df 1024.8 1903.2 341.6 634.4 80 64 80 80 0 0 +locus_17:16:0:8 8 16 1836 1836 1 1836 1 1836 1836 0 100.0 100 100 plus 0.0 3391 16 locus_17 SALM_1937 > GTPase HflX Q 1836 a0d97d985483413f3c18bfe5833ae9ce 612 d6012eddc7a6e8d7d40761d00ed71a5a 1285.2 2386.8 428.4 795.6 80 64 80 80 0 0 +locus_18:17:0:9 9 17 1914 1914 1 1914 1 1914 1914 0 100.0 100 100 plus 0.0 3535 17 locus_18 SALM_1997 ? biosynthetic arginine decarboxylase R 1914 b3021e979faa7600756c06dfadfcf14c 638 e68e83956ee1d4c685571e5348c8def1 1339.8 2488.2 446.6 829.4 80 64 80 80 0 0 +locus_19:18:0:10 10 18 2037 2037 1 2037 1 2037 2037 0 100.0 100 100 plus 0.0 3762 18 locus_19 SALM_9926 , https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662 S 2037 a012eee23637b48e39b00808a057e35d 679 1302fea1dbd5bb756db34e09b863ad44 1425.9 2648.1 475.3 882.7 80 64 80 80 0 0 +locus_2:1:0:11 11 1 285 285 1 285 1 285 285 0 100.0 100 100 plus 1.03e-152 527 1 locus_2 SALM_120 @ outer membrane protein B 285 e35184c8ff18e9116fc8faef20532f56 95 2d9e7f7f0d11bf02db860b0799e7924d 199.5 370.5 66.5 123.5 80 64 80 80 0 0 +locus_20:19:0:12 12 19 4935 4935 1 4935 1 4935 4935 0 100.0 100 100 plus 0.0 9114 19 locus_20 SALM_6064 . alpha-2-macroglobulin family protein T 4935 4461918e985715e4a2b07494e1f91326 1645 12ff39a1933fa478729b142976b0a659 3454.5 6415.5 1151.5 2138.5 80 64 80 80 0 0 +locus_3:2:0:13 13 2 327 327 1 327 1 327 327 0 100.0 100 100 plus 5.34e-176 604 2 locus_3 SALM_2016 # tRNA (guanosine(46)-N7)-methyltransferase TrmB C 327 670705cd2a59c4a23a897ac656a888fe 109 4277f8bbf1fd6682001da089fea83d04 228.9 425.1 76.3 141.7 80 64 80 80 0 0 +locus_4:3:0:14 14 3 417 417 1 417 1 417 417 0 100.0 100 100 plus 0.0 771 3 locus_4 SALM_8644 $ AZ624_004720 AZ624_004720 417 ac1b21798c0f672ad26f5a91ea278590 139 0c25367401155278f34832f184ab44e6 291.9 542.1 97.3 180.7 80 64 80 80 0 0 +locus_5:4:0:15 15 4 444 444 1 444 1 444 444 0 100.0 100 100 plus 0.0 821 4 locus_5 SALM_1876 % SPI-1 type III secretion system invasion lipoprotein InvH E 444 d00defcca8588f21ce16fa1d0ac13389 148 c8bf12a8057fc5e541fcd4924136a40d 310.8 577.2 103.6 192.4 80 64 80 80 0 0 +locus_6:5:0:16 16 5 543 543 1 543 1 543 543 0 100.0 100 100 plus 0.0 1003 5 locus_6 SALM_640 ^ MOSC domain-containing protein F 543 a11561f2804e2c32c78049f8b9aeb517 181 1f6a45ea291940ef2c17ec1cfdd5fbdd 380.1 705.9 126.7 235.3 80 64 80 80 0 0 +locus_7:6:0:17 17 6 606 606 1 606 1 606 606 0 100.0 100 100 plus 0.0 1120 6 locus_7 SALM_1501 & India: Vellore G 606 dc94bf1ec4ff9bed2a1f460cbd958656 202 62252b3326997117f127efb88ff09294 424.2 787.8 141.4 262.6 80 64 80 80 0 0 +locus_8:7:0:18 18 7 642 642 1 642 1 642 642 0 100.0 100 100 plus 0.0 1186 7 locus_8 SALM_756 * DNA polymerase III subunit delta' H 642 7ebe74afecf146ec4db816c8deced64f 214 2449629747a7c58f0f2cad411db87178 449.4 834.6 149.8 278.2 80 64 80 80 0 0 +locus_9:8:0:19 19 8 684 684 1 684 1 684 684 0 100.0 100 100 plus 0.0 1264 8 locus_9 SALM_7353 1 fimbrial assembly chaperone I 684 41ebb36872854b2b33c8c028e23d8ad1 228 a75a93991228940fb46d917f238beb5a 478.8 889.2 159.6 296.4 80 64 80 80 0 0 diff --git a/tests/test_data/outputs/report/conservative/G4/profile.json b/tests/test_data/outputs/report/conservative/G4/profile.json new file mode 100755 index 0000000..f5ece9e --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G4/profile.json @@ -0,0 +1,24 @@ +{ + "G4": { + "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", + "locus_2": "e35184c8ff18e9116fc8faef20532f56", + "locus_3": "670705cd2a59c4a23a897ac656a888fe", + "locus_4": "ac1b21798c0f672ad26f5a91ea278590", + "locus_5": "d00defcca8588f21ce16fa1d0ac13389", + "locus_6": "a11561f2804e2c32c78049f8b9aeb517", + "locus_7": "dc94bf1ec4ff9bed2a1f460cbd958656", + "locus_8": "7ebe74afecf146ec4db816c8deced64f", + "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", + "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", + "locus_11": "5b128d659955716833ce42f2bb060212", + "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", + "locus_13": "8f300259dcb46224bdc1fe5273107324", + "locus_14": "b9060019038526aa6fc38d2f7510edc6", + "locus_15": "bc98c2fe196a68a79036814396513a8d", + "locus_16": "16e55766c603fe33c9e75d8e81743ae2", + "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", + "locus_18": "b3021e979faa7600756c06dfadfcf14c", + "locus_19": "a012eee23637b48e39b00808a057e35d", + "locus_20": "4461918e985715e4a2b07494e1f91326" + } +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/conservative/G4/protein.hits.txt b/tests/test_data/outputs/report/conservative/G4/protein.hits.txt new file mode 100755 index 0000000..974fe9e --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G4/protein.hits.txt @@ -0,0 +1,21 @@ +query_name qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore seq_id locus_name locus_name_alt locus_product locus_description locus_uid dna_seq_len dna_seq_hash aa_seq_len aa_seq_hash dna_min_len dna_max_len aa_min_len aa_max_len dna_min_ident aa_min_ident min_dna_match_cov min_aa_match_cov count_int_stops dna_ambig_count +locus_1:0:0:0 0 0 34 34 1 34 1 34 34 0 100.0 100 100 4.94e-20 64.3 0 locus_1 SALM_11273 ! hypothetical protein 1 102 d17b02d12afa7f832ee37df6f24a8f55 34 84ac553cb45dd790c497c27152888f02 71.4 132.6 23.8 44.2 80 64 80 80 0 0 +locus_10:9:0:1 1 9 254 254 1 254 1 254 254 0 100.0 100 100 0.0 514.0 9 locus_10 SALM_1891 200.96 5'/3'-nucleotidase SurE J 762 fe04d17ec353c08b903c85fc0ca4dc02 254 bd09702e070040e0fc8d2ec3b830812c 533.4 990.6 177.8 330.2 80 64 80 80 0 0 +locus_11:10:0:2 2 10 286 286 1 286 1 286 286 0 100.0 100 100 0.0 579.0 10 locus_11 SALM_1452 | 1-phosphofructokinase K 858 5b128d659955716833ce42f2bb060212 286 4daecf1a6ccae76e1d97a4ce9ee4ff0b 600.6 1115.4 200.2 371.8 80 64 80 80 0 0 +locus_12:11:0:3 3 11 324 324 1 324 1 324 324 0 100.0 100 100 0.0 640.0 11 locus_12 SALM_11020 _ 1 L 972 eb72da68c159497d5f0c8eeddc51b5ae 324 a1f16dd269dc295e715f2d00f9c26b43 680.4 1263.6 226.8 421.2 80 64 80 80 0 0 +locus_13:12:0:4 4 12 366 366 1 366 1 366 366 0 100.0 100 100 0.0 754.0 12 locus_13 SALM_1934 - 3.9 M 1098 8f300259dcb46224bdc1fe5273107324 366 b16bc26e42f5bc4327504b1ec8b2d53d 768.6 1427.4 256.2 475.8 80 64 80 80 0 0 +locus_14:13:0:5 5 13 427 427 1 427 1 427 427 0 100.0 100 100 0.0 869.0 13 locus_14 SALM_2871 + @ N 1281 b9060019038526aa6fc38d2f7510edc6 427 ed11561b2bedaa12c7a28eb0e9346101 896.7 1665.3 298.9 555.1 80 64 80 80 0 0 +locus_15:14:0:6 6 14 478 478 1 478 1 478 478 0 100.0 100 100 0.0 972.0 14 locus_15 SALM_583 = DMT family transporter O 1434 bc98c2fe196a68a79036814396513a8d 478 16cb2acec887d5861327e04f2705b8ce 1003.8 1864.2 334.6 621.4 80 64 80 80 0 0 +locus_16:15:0:7 7 15 488 488 1 488 1 488 488 0 100.0 100 100 0.0 1004.0 15 locus_16 SALM_780 < murein transglycosylase A P 1464 16e55766c603fe33c9e75d8e81743ae2 488 b20314e55f9713235e9c4ea5817b56df 1024.8 1903.2 341.6 634.4 80 64 80 80 0 0 +locus_17:16:0:8 8 16 612 612 1 612 1 612 612 0 100.0 100 100 0.0 1241.0 16 locus_17 SALM_1937 > GTPase HflX Q 1836 a0d97d985483413f3c18bfe5833ae9ce 612 d6012eddc7a6e8d7d40761d00ed71a5a 1285.2 2386.8 428.4 795.6 80 64 80 80 0 0 +locus_18:17:0:9 9 17 638 638 1 638 1 638 638 0 100.0 100 100 0.0 1326.0 17 locus_18 SALM_1997 ? biosynthetic arginine decarboxylase R 1914 b3021e979faa7600756c06dfadfcf14c 638 e68e83956ee1d4c685571e5348c8def1 1339.8 2488.2 446.6 829.4 80 64 80 80 0 0 +locus_19:18:0:10 10 18 679 679 1 679 1 679 679 0 100.0 100 100 0.0 1419.0 18 locus_19 SALM_9926 , https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662 S 2037 a012eee23637b48e39b00808a057e35d 679 1302fea1dbd5bb756db34e09b863ad44 1425.9 2648.1 475.3 882.7 80 64 80 80 0 0 +locus_2:1:0:11 11 1 95 95 1 95 1 95 95 0 100.0 100 100 1.2299999999999998e-68 191.0 1 locus_2 SALM_120 @ outer membrane protein B 285 e35184c8ff18e9116fc8faef20532f56 95 2d9e7f7f0d11bf02db860b0799e7924d 199.5 370.5 66.5 123.5 80 64 80 80 0 0 +locus_20:19:0:12 12 19 1645 1645 1 1645 1 1645 1645 0 100.0 100 100 0.0 3332.0 19 locus_20 SALM_6064 . alpha-2-macroglobulin family protein T 4935 4461918e985715e4a2b07494e1f91326 1645 12ff39a1933fa478729b142976b0a659 3454.5 6415.5 1151.5 2138.5 80 64 80 80 0 0 +locus_3:2:0:13 13 2 109 109 1 109 1 109 109 0 100.0 100 100 3.09e-81 224.0 2 locus_3 SALM_2016 # tRNA (guanosine(46)-N7)-methyltransferase TrmB C 327 670705cd2a59c4a23a897ac656a888fe 109 4277f8bbf1fd6682001da089fea83d04 228.9 425.1 76.3 141.7 80 64 80 80 0 0 +locus_4:3:0:14 14 3 139 139 1 139 1 139 139 0 100.0 100 100 2.34e-106 290.0 3 locus_4 SALM_8644 $ AZ624_004720 AZ624_004720 417 ac1b21798c0f672ad26f5a91ea278590 139 0c25367401155278f34832f184ab44e6 291.9 542.1 97.3 180.7 80 64 80 80 0 0 +locus_5:4:0:15 15 4 148 148 1 148 1 148 148 0 100.0 100 100 1.0400000000000001e-110 302.0 4 locus_5 SALM_1876 % SPI-1 type III secretion system invasion lipoprotein InvH E 444 d00defcca8588f21ce16fa1d0ac13389 148 c8bf12a8057fc5e541fcd4924136a40d 310.8 577.2 103.6 192.4 80 64 80 80 0 0 +locus_6:5:0:16 16 5 181 181 1 181 1 181 181 0 100.0 100 100 1.7899999999999999e-140 380.0 5 locus_6 SALM_640 ^ MOSC domain-containing protein F 543 a11561f2804e2c32c78049f8b9aeb517 181 1f6a45ea291940ef2c17ec1cfdd5fbdd 380.1 705.9 126.7 235.3 80 64 80 80 0 0 +locus_7:6:0:17 17 6 202 202 1 202 1 202 202 0 100.0 100 100 3.71e-154 416.0 6 locus_7 SALM_1501 & India: Vellore G 606 dc94bf1ec4ff9bed2a1f460cbd958656 202 62252b3326997117f127efb88ff09294 424.2 787.8 141.4 262.6 80 64 80 80 0 0 +locus_8:7:0:18 18 7 214 214 1 214 1 214 214 0 100.0 100 100 1.75e-157 426.0 7 locus_8 SALM_756 * DNA polymerase III subunit delta' H 642 7ebe74afecf146ec4db816c8deced64f 214 2449629747a7c58f0f2cad411db87178 449.4 834.6 149.8 278.2 80 64 80 80 0 0 +locus_9:8:0:19 19 8 228 228 1 228 1 228 228 0 100.0 100 100 6.32e-172 463.0 8 locus_9 SALM_7353 1 fimbrial assembly chaperone I 684 41ebb36872854b2b33c8c028e23d8ad1 228 a75a93991228940fb46d917f238beb5a 478.8 889.2 159.6 296.4 80 64 80 80 0 0 diff --git a/tests/test_data/outputs/report/conservative/G5/nucleotide.hits.txt b/tests/test_data/outputs/report/conservative/G5/nucleotide.hits.txt new file mode 100755 index 0000000..1b3ef48 --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G5/nucleotide.hits.txt @@ -0,0 +1,21 @@ +query_name qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore seq_id locus_name locus_name_alt locus_product locus_description locus_uid dna_seq_len dna_seq_hash aa_seq_len aa_seq_hash dna_min_len dna_max_len aa_min_len aa_max_len dna_min_ident aa_min_ident min_dna_match_cov min_aa_match_cov count_int_stops dna_ambig_count +locus_1:0:0:0 0 0 102 102 1 102 1 102 103 1 97.087 100 100 plus 1.81e-46 172 0 locus_1 SALM_11273 ! hypothetical protein 1 102 d17b02d12afa7f832ee37df6f24a8f55 34 84ac553cb45dd790c497c27152888f02 71.4 132.6 23.8 44.2 80 64 80 80 0 0 +locus_10:9:0:1 1 9 762 762 1 762 1 762 762 0 100.0 100 100 plus 0.0 1408 9 locus_10 SALM_1891 200.96 5'/3'-nucleotidase SurE J 762 fe04d17ec353c08b903c85fc0ca4dc02 254 bd09702e070040e0fc8d2ec3b830812c 533.4 990.6 177.8 330.2 80 64 80 80 0 0 +locus_11:10:0:2 2 10 858 858 1 858 1 858 858 19 97.786 100 100 plus 0.0 1480 10 locus_11 SALM_1452 | 1-phosphofructokinase K 858 5b128d659955716833ce42f2bb060212 286 4daecf1a6ccae76e1d97a4ce9ee4ff0b 600.6 1115.4 200.2 371.8 80 64 80 80 0 0 +locus_12:11:0:3 3 11 972 972 1 972 1 972 972 0 100.0 100 100 plus 0.0 1796 11 locus_12 SALM_11020 _ 1 L 972 eb72da68c159497d5f0c8eeddc51b5ae 324 a1f16dd269dc295e715f2d00f9c26b43 680.4 1263.6 226.8 421.2 80 64 80 80 0 0 +locus_13:12:0:4 4 12 1098 1098 1 1098 1 1098 1098 0 100.0 100 100 plus 0.0 2028 12 locus_13 SALM_1934 - 3.9 M 1098 8f300259dcb46224bdc1fe5273107324 366 b16bc26e42f5bc4327504b1ec8b2d53d 768.6 1427.4 256.2 475.8 80 64 80 80 0 0 +locus_14:13:0:5 5 13 1281 1281 1 1281 1 1281 1281 11 99.141 100 100 plus 0.0 2305 13 locus_14 SALM_2871 + @ N 1281 b9060019038526aa6fc38d2f7510edc6 427 ed11561b2bedaa12c7a28eb0e9346101 896.7 1665.3 298.9 555.1 80 64 80 80 0 0 +locus_15:14:0:6 6 14 1434 1434 1 1434 1 1434 1434 0 100.0 100 100 plus 0.0 2649 14 locus_15 SALM_583 = DMT family transporter O 1434 bc98c2fe196a68a79036814396513a8d 478 16cb2acec887d5861327e04f2705b8ce 1003.8 1864.2 334.6 621.4 80 64 80 80 0 0 +locus_16:15:0:7 7 15 1464 1464 1 1464 1 1464 1464 15 98.975 100 100 plus 0.0 2621 15 locus_16 SALM_780 < murein transglycosylase A P 1464 16e55766c603fe33c9e75d8e81743ae2 488 b20314e55f9713235e9c4ea5817b56df 1024.8 1903.2 341.6 634.4 80 64 80 80 0 0 +locus_17:16:0:8 8 16 1836 1836 1 1836 1 1836 1836 0 100.0 100 100 plus 0.0 3391 16 locus_17 SALM_1937 > GTPase HflX Q 1836 a0d97d985483413f3c18bfe5833ae9ce 612 d6012eddc7a6e8d7d40761d00ed71a5a 1285.2 2386.8 428.4 795.6 80 64 80 80 0 0 +locus_18:17:0:9 9 17 1914 1914 1 1914 1 1914 1914 0 100.0 100 100 plus 0.0 3535 17 locus_18 SALM_1997 ? biosynthetic arginine decarboxylase R 1914 b3021e979faa7600756c06dfadfcf14c 638 e68e83956ee1d4c685571e5348c8def1 1339.8 2488.2 446.6 829.4 80 64 80 80 0 0 +locus_19:18:0:10 10 18 2037 2037 1 2037 1 2037 2037 16 99.215 100 100 plus 0.0 3674 18 locus_19 SALM_9926 , https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662 S 2037 a012eee23637b48e39b00808a057e35d 679 1302fea1dbd5bb756db34e09b863ad44 1425.9 2648.1 475.3 882.7 80 64 80 80 0 0 +locus_2:1:0:11 11 1 285 285 1 285 1 285 285 17 94.035 100 100 plus 2.3100000000000004e-124 433 1 locus_2 SALM_120 @ outer membrane protein B 285 e35184c8ff18e9116fc8faef20532f56 95 2d9e7f7f0d11bf02db860b0799e7924d 199.5 370.5 66.5 123.5 80 64 80 80 0 0 +locus_20:19:0:12 12 19 4935 4935 1 4935 1 4935 4935 0 100.0 100 100 plus 0.0 9114 19 locus_20 SALM_6064 . alpha-2-macroglobulin family protein T 4935 4461918e985715e4a2b07494e1f91326 1645 12ff39a1933fa478729b142976b0a659 3454.5 6415.5 1151.5 2138.5 80 64 80 80 0 0 +locus_3:2:0:13 13 2 327 327 1 327 1 327 327 0 100.0 100 100 plus 5.34e-176 604 2 locus_3 SALM_2016 # tRNA (guanosine(46)-N7)-methyltransferase TrmB C 327 670705cd2a59c4a23a897ac656a888fe 109 4277f8bbf1fd6682001da089fea83d04 228.9 425.1 76.3 141.7 80 64 80 80 0 0 +locus_4:3:0:14 14 3 417 417 1 417 1 417 417 11 97.362 100 100 plus 0.0 710 3 locus_4 SALM_8644 $ AZ624_004720 AZ624_004720 417 ac1b21798c0f672ad26f5a91ea278590 139 0c25367401155278f34832f184ab44e6 291.9 542.1 97.3 180.7 80 64 80 80 0 0 +locus_5:4:0:15 15 4 444 444 1 444 1 444 444 15 96.622 100 100 plus 0.0 737 4 locus_5 SALM_1876 % SPI-1 type III secretion system invasion lipoprotein InvH E 444 d00defcca8588f21ce16fa1d0ac13389 148 c8bf12a8057fc5e541fcd4924136a40d 310.8 577.2 103.6 192.4 80 64 80 80 0 0 +locus_6:5:0:16 16 5 543 543 1 543 1 543 543 0 100.0 100 100 plus 0.0 1003 5 locus_6 SALM_640 ^ MOSC domain-containing protein F 543 a11561f2804e2c32c78049f8b9aeb517 181 1f6a45ea291940ef2c17ec1cfdd5fbdd 380.1 705.9 126.7 235.3 80 64 80 80 0 0 +locus_7:6:0:17 17 6 606 606 1 606 1 606 606 15 97.525 100 100 plus 0.0 1037 6 locus_7 SALM_1501 & India: Vellore G 606 dc94bf1ec4ff9bed2a1f460cbd958656 202 62252b3326997117f127efb88ff09294 424.2 787.8 141.4 262.6 80 64 80 80 0 0 +locus_8:7:0:18 18 7 642 642 1 642 1 642 642 0 100.0 100 100 plus 0.0 1186 7 locus_8 SALM_756 * DNA polymerase III subunit delta' H 642 7ebe74afecf146ec4db816c8deced64f 214 2449629747a7c58f0f2cad411db87178 449.4 834.6 149.8 278.2 80 64 80 80 0 0 +locus_9:8:0:19 19 8 684 684 1 684 1 684 684 0 100.0 100 100 plus 0.0 1264 8 locus_9 SALM_7353 1 fimbrial assembly chaperone I 684 41ebb36872854b2b33c8c028e23d8ad1 228 a75a93991228940fb46d917f238beb5a 478.8 889.2 159.6 296.4 80 64 80 80 0 0 diff --git a/tests/test_data/outputs/report/conservative/G5/profile.json b/tests/test_data/outputs/report/conservative/G5/profile.json new file mode 100755 index 0000000..5afae24 --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G5/profile.json @@ -0,0 +1,24 @@ +{ + "G5": { + "locus_1": "e9e707ebc64e10a881f1323ebff85369", + "locus_2": "8b70e777f6bbf2c91ff75947824b5976", + "locus_3": "670705cd2a59c4a23a897ac656a888fe", + "locus_4": "73790840c76943caac0ebb3b2b3f0b98", + "locus_5": "8cf4341689dd00f74adfcc43d1f4a35e", + "locus_6": "a11561f2804e2c32c78049f8b9aeb517", + "locus_7": "49d9878c9d3071aa1d2f26cb947b784c", + "locus_8": "7ebe74afecf146ec4db816c8deced64f", + "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", + "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", + "locus_11": "c4266f2f24fdd8e039113c6b0955af9f", + "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", + "locus_13": "8f300259dcb46224bdc1fe5273107324", + "locus_14": "2fa0b06ed72e36b4071cab9d0b4f87d0", + "locus_15": "bc98c2fe196a68a79036814396513a8d", + "locus_16": "a9b3cb97dac3cda6e932a49bf9a507bd", + "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", + "locus_18": "b3021e979faa7600756c06dfadfcf14c", + "locus_19": "de32372598811d63bcc1a0eaf6872644", + "locus_20": "4461918e985715e4a2b07494e1f91326" + } +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/conservative/G5/protein.hits.txt b/tests/test_data/outputs/report/conservative/G5/protein.hits.txt new file mode 100755 index 0000000..8e13c2b --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G5/protein.hits.txt @@ -0,0 +1,21 @@ +query_name qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore seq_id locus_name locus_name_alt locus_product locus_description locus_uid dna_seq_len dna_seq_hash aa_seq_len aa_seq_hash dna_min_len dna_max_len aa_min_len aa_max_len dna_min_ident aa_min_ident min_dna_match_cov min_aa_match_cov count_int_stops dna_ambig_count +locus_1:0:0:0 0 0 34 34 1 34 1 34 34 2 94.118 100 100 2.04e-17 57.8 0 locus_1 SALM_11273 ! hypothetical protein 1 102 d17b02d12afa7f832ee37df6f24a8f55 34 84ac553cb45dd790c497c27152888f02 71.4 132.6 23.8 44.2 80 64 80 80 0 0 +locus_10:9:0:1 1 9 254 254 1 254 1 254 254 0 100.0 100 100 0.0 514.0 9 locus_10 SALM_1891 200.96 5'/3'-nucleotidase SurE J 762 fe04d17ec353c08b903c85fc0ca4dc02 254 bd09702e070040e0fc8d2ec3b830812c 533.4 990.6 177.8 330.2 80 64 80 80 0 0 +locus_11:10:0:2 2 10 286 286 1 286 1 286 286 17 94.056 100 100 0.0 518.0 10 locus_11 SALM_1452 | 1-phosphofructokinase K 858 5b128d659955716833ce42f2bb060212 286 4daecf1a6ccae76e1d97a4ce9ee4ff0b 600.6 1115.4 200.2 371.8 80 64 80 80 0 0 +locus_12:11:0:3 3 11 324 324 1 324 1 324 324 0 100.0 100 100 0.0 640.0 11 locus_12 SALM_11020 _ 1 L 972 eb72da68c159497d5f0c8eeddc51b5ae 324 a1f16dd269dc295e715f2d00f9c26b43 680.4 1263.6 226.8 421.2 80 64 80 80 0 0 +locus_13:12:0:4 4 12 366 366 1 366 1 366 366 0 100.0 100 100 0.0 754.0 12 locus_13 SALM_1934 - 3.9 M 1098 8f300259dcb46224bdc1fe5273107324 366 b16bc26e42f5bc4327504b1ec8b2d53d 768.6 1427.4 256.2 475.8 80 64 80 80 0 0 +locus_14:13:0:5 5 13 427 427 1 427 1 427 427 9 97.892 100 100 0.0 846.0 13 locus_14 SALM_2871 + @ N 1281 b9060019038526aa6fc38d2f7510edc6 427 ed11561b2bedaa12c7a28eb0e9346101 896.7 1665.3 298.9 555.1 80 64 80 80 0 0 +locus_15:14:0:6 6 14 478 478 1 478 1 478 478 0 100.0 100 100 0.0 972.0 14 locus_15 SALM_583 = DMT family transporter O 1434 bc98c2fe196a68a79036814396513a8d 478 16cb2acec887d5861327e04f2705b8ce 1003.8 1864.2 334.6 621.4 80 64 80 80 0 0 +locus_16:15:0:7 7 15 488 488 1 488 1 488 488 14 97.131 100 100 0.0 972.0 15 locus_16 SALM_780 < murein transglycosylase A P 1464 16e55766c603fe33c9e75d8e81743ae2 488 b20314e55f9713235e9c4ea5817b56df 1024.8 1903.2 341.6 634.4 80 64 80 80 0 0 +locus_17:16:0:8 8 16 612 612 1 612 1 612 612 0 100.0 100 100 0.0 1241.0 16 locus_17 SALM_1937 > GTPase HflX Q 1836 a0d97d985483413f3c18bfe5833ae9ce 612 d6012eddc7a6e8d7d40761d00ed71a5a 1285.2 2386.8 428.4 795.6 80 64 80 80 0 0 +locus_18:17:0:9 9 17 638 638 1 638 1 638 638 0 100.0 100 100 0.0 1326.0 17 locus_18 SALM_1997 ? biosynthetic arginine decarboxylase R 1914 b3021e979faa7600756c06dfadfcf14c 638 e68e83956ee1d4c685571e5348c8def1 1339.8 2488.2 446.6 829.4 80 64 80 80 0 0 +locus_19:18:0:10 10 18 679 679 1 679 1 679 679 8 98.822 100 100 0.0 1403.0 18 locus_19 SALM_9926 , https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662 S 2037 a012eee23637b48e39b00808a057e35d 679 1302fea1dbd5bb756db34e09b863ad44 1425.9 2648.1 475.3 882.7 80 64 80 80 0 0 +locus_2:1:0:11 11 1 95 95 1 95 1 95 95 11 88.421 100 100 2.89e-51 147.0 1 locus_2 SALM_120 @ outer membrane protein B 285 e35184c8ff18e9116fc8faef20532f56 95 2d9e7f7f0d11bf02db860b0799e7924d 199.5 370.5 66.5 123.5 80 64 80 80 0 0 +locus_20:19:0:12 12 19 1645 1645 1 1645 1 1645 1645 0 100.0 100 100 0.0 3332.0 19 locus_20 SALM_6064 . alpha-2-macroglobulin family protein T 4935 4461918e985715e4a2b07494e1f91326 1645 12ff39a1933fa478729b142976b0a659 3454.5 6415.5 1151.5 2138.5 80 64 80 80 0 0 +locus_3:2:0:13 13 2 109 109 1 109 1 109 109 0 100.0 100 100 3.09e-81 224.0 2 locus_3 SALM_2016 # tRNA (guanosine(46)-N7)-methyltransferase TrmB C 327 670705cd2a59c4a23a897ac656a888fe 109 4277f8bbf1fd6682001da089fea83d04 228.9 425.1 76.3 141.7 80 64 80 80 0 0 +locus_4:3:0:14 14 3 139 139 1 139 1 139 139 9 93.525 100 100 3.6999999999999996e-98 269.0 3 locus_4 SALM_8644 $ AZ624_004720 AZ624_004720 417 ac1b21798c0f672ad26f5a91ea278590 139 0c25367401155278f34832f184ab44e6 291.9 542.1 97.3 180.7 80 64 80 80 0 0 +locus_5:4:0:15 15 4 148 148 1 148 1 148 148 11 92.568 100 100 1.5600000000000001e-97 268.0 4 locus_5 SALM_1876 % SPI-1 type III secretion system invasion lipoprotein InvH E 444 d00defcca8588f21ce16fa1d0ac13389 148 c8bf12a8057fc5e541fcd4924136a40d 310.8 577.2 103.6 192.4 80 64 80 80 0 0 +locus_6:5:0:16 16 5 181 181 1 181 1 181 181 0 100.0 100 100 1.7899999999999999e-140 380.0 5 locus_6 SALM_640 ^ MOSC domain-containing protein F 543 a11561f2804e2c32c78049f8b9aeb517 181 1f6a45ea291940ef2c17ec1cfdd5fbdd 380.1 705.9 126.7 235.3 80 64 80 80 0 0 +locus_7:6:0:17 17 6 202 202 1 202 1 202 202 14 93.069 100 100 2.36e-141 384.0 6 locus_7 SALM_1501 & India: Vellore G 606 dc94bf1ec4ff9bed2a1f460cbd958656 202 62252b3326997117f127efb88ff09294 424.2 787.8 141.4 262.6 80 64 80 80 0 0 +locus_8:7:0:18 18 7 214 214 1 214 1 214 214 0 100.0 100 100 1.75e-157 426.0 7 locus_8 SALM_756 * DNA polymerase III subunit delta' H 642 7ebe74afecf146ec4db816c8deced64f 214 2449629747a7c58f0f2cad411db87178 449.4 834.6 149.8 278.2 80 64 80 80 0 0 +locus_9:8:0:19 19 8 228 228 1 228 1 228 228 0 100.0 100 100 6.32e-172 463.0 8 locus_9 SALM_7353 1 fimbrial assembly chaperone I 684 41ebb36872854b2b33c8c028e23d8ad1 228 a75a93991228940fb46d917f238beb5a 478.8 889.2 159.6 296.4 80 64 80 80 0 0 diff --git a/tests/test_data/outputs/report/conservative/G6/nucleotide.hits.txt b/tests/test_data/outputs/report/conservative/G6/nucleotide.hits.txt new file mode 100755 index 0000000..d721903 --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G6/nucleotide.hits.txt @@ -0,0 +1,21 @@ +query_name qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore seq_id locus_name locus_name_alt locus_product locus_description locus_uid dna_seq_len dna_seq_hash aa_seq_len aa_seq_hash dna_min_len dna_max_len aa_min_len aa_max_len dna_min_ident aa_min_ident min_dna_match_cov min_aa_match_cov count_int_stops dna_ambig_count +locus_1:0:0:0 0 0 102 102 1 102 1 102 102 2 98.039 100 100 plus 3.8999999999999997e-48 178 0 locus_1 SALM_11273 ! hypothetical protein 1 102 d17b02d12afa7f832ee37df6f24a8f55 34 84ac553cb45dd790c497c27152888f02 71.4 132.6 23.8 44.2 80 64 80 80 0 0 +locus_10:9:0:1 1 9 762 762 1 762 1 762 762 0 100.0 100 100 plus 0.0 1408 9 locus_10 SALM_1891 200.96 5'/3'-nucleotidase SurE J 762 fe04d17ec353c08b903c85fc0ca4dc02 254 bd09702e070040e0fc8d2ec3b830812c 533.4 990.6 177.8 330.2 80 64 80 80 0 0 +locus_11:10:0:2 2 10 858 858 1 858 1 858 858 0 100.0 100 100 plus 0.0 1585 10 locus_11 SALM_1452 | 1-phosphofructokinase K 858 5b128d659955716833ce42f2bb060212 286 4daecf1a6ccae76e1d97a4ce9ee4ff0b 600.6 1115.4 200.2 371.8 80 64 80 80 0 0 +locus_12:11:0:3 3 11 972 972 1 972 1 972 972 0 100.0 100 100 plus 0.0 1796 11 locus_12 SALM_11020 _ 1 L 972 eb72da68c159497d5f0c8eeddc51b5ae 324 a1f16dd269dc295e715f2d00f9c26b43 680.4 1263.6 226.8 421.2 80 64 80 80 0 0 +locus_13:12:0:4 4 12 1098 1098 1 1098 1 1098 1098 0 100.0 100 100 plus 0.0 2028 12 locus_13 SALM_1934 - 3.9 M 1098 8f300259dcb46224bdc1fe5273107324 366 b16bc26e42f5bc4327504b1ec8b2d53d 768.6 1427.4 256.2 475.8 80 64 80 80 0 0 +locus_14:13:0:5 5 13 1281 1281 1 1281 1 1281 1281 0 100.0 100 100 plus 0.0 2366 13 locus_14 SALM_2871 + @ N 1281 b9060019038526aa6fc38d2f7510edc6 427 ed11561b2bedaa12c7a28eb0e9346101 896.7 1665.3 298.9 555.1 80 64 80 80 0 0 +locus_15:14:0:6 6 14 1434 1434 1 1434 1 1434 1434 0 100.0 100 100 plus 0.0 2649 14 locus_15 SALM_583 = DMT family transporter O 1434 bc98c2fe196a68a79036814396513a8d 478 16cb2acec887d5861327e04f2705b8ce 1003.8 1864.2 334.6 621.4 80 64 80 80 0 0 +locus_16:15:0:7 7 15 1464 1464 1 1464 1 1464 1464 0 100.0 100 100 plus 0.0 2704 15 locus_16 SALM_780 < murein transglycosylase A P 1464 16e55766c603fe33c9e75d8e81743ae2 488 b20314e55f9713235e9c4ea5817b56df 1024.8 1903.2 341.6 634.4 80 64 80 80 0 0 +locus_17:16:0:8 8 16 1836 1836 1 1836 1 1836 1836 0 100.0 100 100 plus 0.0 3391 16 locus_17 SALM_1937 > GTPase HflX Q 1836 a0d97d985483413f3c18bfe5833ae9ce 612 d6012eddc7a6e8d7d40761d00ed71a5a 1285.2 2386.8 428.4 795.6 80 64 80 80 0 0 +locus_18:17:0:9 9 17 1914 1914 1 1914 1 1914 1914 0 100.0 100 100 plus 0.0 3535 17 locus_18 SALM_1997 ? biosynthetic arginine decarboxylase R 1914 b3021e979faa7600756c06dfadfcf14c 638 e68e83956ee1d4c685571e5348c8def1 1339.8 2488.2 446.6 829.4 80 64 80 80 0 0 +locus_19:18:0:10 10 18 2037 2037 1 2037 1 2037 2037 0 100.0 100 100 plus 0.0 3762 18 locus_19 SALM_9926 , https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662 S 2037 a012eee23637b48e39b00808a057e35d 679 1302fea1dbd5bb756db34e09b863ad44 1425.9 2648.1 475.3 882.7 80 64 80 80 0 0 +locus_2:1:0:11 11 1 285 285 1 285 1 285 285 0 100.0 100 100 plus 1.03e-152 527 1 locus_2 SALM_120 @ outer membrane protein B 285 e35184c8ff18e9116fc8faef20532f56 95 2d9e7f7f0d11bf02db860b0799e7924d 199.5 370.5 66.5 123.5 80 64 80 80 0 0 +locus_20:19:0:12 12 19 4935 4935 1 4935 1 4935 4935 0 100.0 100 100 plus 0.0 9114 19 locus_20 SALM_6064 . alpha-2-macroglobulin family protein T 4935 4461918e985715e4a2b07494e1f91326 1645 12ff39a1933fa478729b142976b0a659 3454.5 6415.5 1151.5 2138.5 80 64 80 80 0 0 +locus_3:2:0:13 13 2 327 327 1 327 1 327 327 0 100.0 100 100 plus 5.34e-176 604 2 locus_3 SALM_2016 # tRNA (guanosine(46)-N7)-methyltransferase TrmB C 327 670705cd2a59c4a23a897ac656a888fe 109 4277f8bbf1fd6682001da089fea83d04 228.9 425.1 76.3 141.7 80 64 80 80 0 0 +locus_4:3:0:14 14 3 417 417 1 417 1 417 417 0 100.0 100 100 plus 0.0 771 3 locus_4 SALM_8644 $ AZ624_004720 AZ624_004720 417 ac1b21798c0f672ad26f5a91ea278590 139 0c25367401155278f34832f184ab44e6 291.9 542.1 97.3 180.7 80 64 80 80 0 0 +locus_5:4:0:15 15 4 444 444 1 444 1 444 444 0 100.0 100 100 plus 0.0 821 4 locus_5 SALM_1876 % SPI-1 type III secretion system invasion lipoprotein InvH E 444 d00defcca8588f21ce16fa1d0ac13389 148 c8bf12a8057fc5e541fcd4924136a40d 310.8 577.2 103.6 192.4 80 64 80 80 0 0 +locus_6:5:0:16 16 5 543 543 1 543 1 543 543 0 100.0 100 100 plus 0.0 1003 5 locus_6 SALM_640 ^ MOSC domain-containing protein F 543 a11561f2804e2c32c78049f8b9aeb517 181 1f6a45ea291940ef2c17ec1cfdd5fbdd 380.1 705.9 126.7 235.3 80 64 80 80 0 0 +locus_7:6:0:17 17 6 606 606 1 606 1 606 606 0 100.0 100 100 plus 0.0 1120 6 locus_7 SALM_1501 & India: Vellore G 606 dc94bf1ec4ff9bed2a1f460cbd958656 202 62252b3326997117f127efb88ff09294 424.2 787.8 141.4 262.6 80 64 80 80 0 0 +locus_8:7:0:18 18 7 642 642 1 642 1 642 642 0 100.0 100 100 plus 0.0 1186 7 locus_8 SALM_756 * DNA polymerase III subunit delta' H 642 7ebe74afecf146ec4db816c8deced64f 214 2449629747a7c58f0f2cad411db87178 449.4 834.6 149.8 278.2 80 64 80 80 0 0 +locus_9:8:0:19 19 8 684 684 1 684 1 684 684 0 100.0 100 100 plus 0.0 1264 8 locus_9 SALM_7353 1 fimbrial assembly chaperone I 684 41ebb36872854b2b33c8c028e23d8ad1 228 a75a93991228940fb46d917f238beb5a 478.8 889.2 159.6 296.4 80 64 80 80 0 0 diff --git a/tests/test_data/outputs/report/conservative/G6/profile.json b/tests/test_data/outputs/report/conservative/G6/profile.json new file mode 100755 index 0000000..2b54aeb --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G6/profile.json @@ -0,0 +1,24 @@ +{ + "G6": { + "locus_1": "a47cc24760462371e919143c5cc81376", + "locus_2": "e35184c8ff18e9116fc8faef20532f56", + "locus_3": "670705cd2a59c4a23a897ac656a888fe", + "locus_4": "ac1b21798c0f672ad26f5a91ea278590", + "locus_5": "d00defcca8588f21ce16fa1d0ac13389", + "locus_6": "a11561f2804e2c32c78049f8b9aeb517", + "locus_7": "dc94bf1ec4ff9bed2a1f460cbd958656", + "locus_8": "7ebe74afecf146ec4db816c8deced64f", + "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", + "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", + "locus_11": "5b128d659955716833ce42f2bb060212", + "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", + "locus_13": "8f300259dcb46224bdc1fe5273107324", + "locus_14": "b9060019038526aa6fc38d2f7510edc6", + "locus_15": "bc98c2fe196a68a79036814396513a8d", + "locus_16": "16e55766c603fe33c9e75d8e81743ae2", + "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", + "locus_18": "b3021e979faa7600756c06dfadfcf14c", + "locus_19": "a012eee23637b48e39b00808a057e35d", + "locus_20": "4461918e985715e4a2b07494e1f91326" + } +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/conservative/G6/protein.hits.txt b/tests/test_data/outputs/report/conservative/G6/protein.hits.txt new file mode 100755 index 0000000..f5cf292 --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G6/protein.hits.txt @@ -0,0 +1,21 @@ +query_name qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore seq_id locus_name locus_name_alt locus_product locus_description locus_uid dna_seq_len dna_seq_hash aa_seq_len aa_seq_hash dna_min_len dna_max_len aa_min_len aa_max_len dna_min_ident aa_min_ident min_dna_match_cov min_aa_match_cov count_int_stops dna_ambig_count +locus_1:0:0:0 0 0 34 34 1 34 1 34 34 1 97.059 100 100 1.28e-18 60.8 0 locus_1 SALM_11273 ! hypothetical protein 1 102 d17b02d12afa7f832ee37df6f24a8f55 34 84ac553cb45dd790c497c27152888f02 71.4 132.6 23.8 44.2 80 64 80 80 0 0 +locus_10:9:0:1 1 9 254 254 1 254 1 254 254 0 100.0 100 100 0.0 514.0 9 locus_10 SALM_1891 200.96 5'/3'-nucleotidase SurE J 762 fe04d17ec353c08b903c85fc0ca4dc02 254 bd09702e070040e0fc8d2ec3b830812c 533.4 990.6 177.8 330.2 80 64 80 80 0 0 +locus_11:10:0:2 2 10 286 286 1 286 1 286 286 0 100.0 100 100 0.0 579.0 10 locus_11 SALM_1452 | 1-phosphofructokinase K 858 5b128d659955716833ce42f2bb060212 286 4daecf1a6ccae76e1d97a4ce9ee4ff0b 600.6 1115.4 200.2 371.8 80 64 80 80 0 0 +locus_12:11:0:3 3 11 324 324 1 324 1 324 324 0 100.0 100 100 0.0 640.0 11 locus_12 SALM_11020 _ 1 L 972 eb72da68c159497d5f0c8eeddc51b5ae 324 a1f16dd269dc295e715f2d00f9c26b43 680.4 1263.6 226.8 421.2 80 64 80 80 0 0 +locus_13:12:0:4 4 12 366 366 1 366 1 366 366 0 100.0 100 100 0.0 754.0 12 locus_13 SALM_1934 - 3.9 M 1098 8f300259dcb46224bdc1fe5273107324 366 b16bc26e42f5bc4327504b1ec8b2d53d 768.6 1427.4 256.2 475.8 80 64 80 80 0 0 +locus_14:13:0:5 5 13 427 427 1 427 1 427 427 0 100.0 100 100 0.0 869.0 13 locus_14 SALM_2871 + @ N 1281 b9060019038526aa6fc38d2f7510edc6 427 ed11561b2bedaa12c7a28eb0e9346101 896.7 1665.3 298.9 555.1 80 64 80 80 0 0 +locus_15:14:0:6 6 14 478 478 1 478 1 478 478 0 100.0 100 100 0.0 972.0 14 locus_15 SALM_583 = DMT family transporter O 1434 bc98c2fe196a68a79036814396513a8d 478 16cb2acec887d5861327e04f2705b8ce 1003.8 1864.2 334.6 621.4 80 64 80 80 0 0 +locus_16:15:0:7 7 15 488 488 1 488 1 488 488 0 100.0 100 100 0.0 1004.0 15 locus_16 SALM_780 < murein transglycosylase A P 1464 16e55766c603fe33c9e75d8e81743ae2 488 b20314e55f9713235e9c4ea5817b56df 1024.8 1903.2 341.6 634.4 80 64 80 80 0 0 +locus_17:16:0:8 8 16 612 612 1 612 1 612 612 0 100.0 100 100 0.0 1241.0 16 locus_17 SALM_1937 > GTPase HflX Q 1836 a0d97d985483413f3c18bfe5833ae9ce 612 d6012eddc7a6e8d7d40761d00ed71a5a 1285.2 2386.8 428.4 795.6 80 64 80 80 0 0 +locus_18:17:0:9 9 17 638 638 1 638 1 638 638 0 100.0 100 100 0.0 1326.0 17 locus_18 SALM_1997 ? biosynthetic arginine decarboxylase R 1914 b3021e979faa7600756c06dfadfcf14c 638 e68e83956ee1d4c685571e5348c8def1 1339.8 2488.2 446.6 829.4 80 64 80 80 0 0 +locus_19:18:0:10 10 18 679 679 1 679 1 679 679 0 100.0 100 100 0.0 1419.0 18 locus_19 SALM_9926 , https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662 S 2037 a012eee23637b48e39b00808a057e35d 679 1302fea1dbd5bb756db34e09b863ad44 1425.9 2648.1 475.3 882.7 80 64 80 80 0 0 +locus_2:1:0:11 11 1 95 95 1 95 1 95 95 0 100.0 100 100 1.2299999999999998e-68 191.0 1 locus_2 SALM_120 @ outer membrane protein B 285 e35184c8ff18e9116fc8faef20532f56 95 2d9e7f7f0d11bf02db860b0799e7924d 199.5 370.5 66.5 123.5 80 64 80 80 0 0 +locus_20:19:0:12 12 19 1645 1645 1 1645 1 1645 1645 0 100.0 100 100 0.0 3332.0 19 locus_20 SALM_6064 . alpha-2-macroglobulin family protein T 4935 4461918e985715e4a2b07494e1f91326 1645 12ff39a1933fa478729b142976b0a659 3454.5 6415.5 1151.5 2138.5 80 64 80 80 0 0 +locus_3:2:0:13 13 2 109 109 1 109 1 109 109 0 100.0 100 100 3.09e-81 224.0 2 locus_3 SALM_2016 # tRNA (guanosine(46)-N7)-methyltransferase TrmB C 327 670705cd2a59c4a23a897ac656a888fe 109 4277f8bbf1fd6682001da089fea83d04 228.9 425.1 76.3 141.7 80 64 80 80 0 0 +locus_4:3:0:14 14 3 139 139 1 139 1 139 139 0 100.0 100 100 2.34e-106 290.0 3 locus_4 SALM_8644 $ AZ624_004720 AZ624_004720 417 ac1b21798c0f672ad26f5a91ea278590 139 0c25367401155278f34832f184ab44e6 291.9 542.1 97.3 180.7 80 64 80 80 0 0 +locus_5:4:0:15 15 4 148 148 1 148 1 148 148 0 100.0 100 100 1.0400000000000001e-110 302.0 4 locus_5 SALM_1876 % SPI-1 type III secretion system invasion lipoprotein InvH E 444 d00defcca8588f21ce16fa1d0ac13389 148 c8bf12a8057fc5e541fcd4924136a40d 310.8 577.2 103.6 192.4 80 64 80 80 0 0 +locus_6:5:0:16 16 5 181 181 1 181 1 181 181 0 100.0 100 100 1.7899999999999999e-140 380.0 5 locus_6 SALM_640 ^ MOSC domain-containing protein F 543 a11561f2804e2c32c78049f8b9aeb517 181 1f6a45ea291940ef2c17ec1cfdd5fbdd 380.1 705.9 126.7 235.3 80 64 80 80 0 0 +locus_7:6:0:17 17 6 202 202 1 202 1 202 202 0 100.0 100 100 3.71e-154 416.0 6 locus_7 SALM_1501 & India: Vellore G 606 dc94bf1ec4ff9bed2a1f460cbd958656 202 62252b3326997117f127efb88ff09294 424.2 787.8 141.4 262.6 80 64 80 80 0 0 +locus_8:7:0:18 18 7 214 214 1 214 1 214 214 0 100.0 100 100 1.75e-157 426.0 7 locus_8 SALM_756 * DNA polymerase III subunit delta' H 642 7ebe74afecf146ec4db816c8deced64f 214 2449629747a7c58f0f2cad411db87178 449.4 834.6 149.8 278.2 80 64 80 80 0 0 +locus_9:8:0:19 19 8 228 228 1 228 1 228 228 0 100.0 100 100 6.32e-172 463.0 8 locus_9 SALM_7353 1 fimbrial assembly chaperone I 684 41ebb36872854b2b33c8c028e23d8ad1 228 a75a93991228940fb46d917f238beb5a 478.8 889.2 159.6 296.4 80 64 80 80 0 0 diff --git a/tests/test_data/outputs/report/conservative/G7/nucleotide.hits.txt b/tests/test_data/outputs/report/conservative/G7/nucleotide.hits.txt new file mode 100755 index 0000000..d90535a --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G7/nucleotide.hits.txt @@ -0,0 +1,21 @@ +query_name qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore seq_id locus_name locus_name_alt locus_product locus_description locus_uid dna_seq_len dna_seq_hash aa_seq_len aa_seq_hash dna_min_len dna_max_len aa_min_len aa_max_len dna_min_ident aa_min_ident min_dna_match_cov min_aa_match_cov count_int_stops dna_ambig_count +locus_1:0:0:0 0 0 102 102 1 102 1 102 102 0 100.0 100 100 plus 1.8e-51 189 0 locus_1 SALM_11273 ! hypothetical protein 1 102 d17b02d12afa7f832ee37df6f24a8f55 34 84ac553cb45dd790c497c27152888f02 71.4 132.6 23.8 44.2 80 64 80 80 0 0 +locus_10:9:0:1 1 9 762 762 3 762 3 762 760 0 100.0 99 99 plus 0.0 1404 9 locus_10 SALM_1891 200.96 5'/3'-nucleotidase SurE J 762 fe04d17ec353c08b903c85fc0ca4dc02 254 bd09702e070040e0fc8d2ec3b830812c 533.4 990.6 177.8 330.2 80 64 80 80 0 0 +locus_11:10:0:2 2 10 858 858 1 858 1 858 858 19 97.786 100 100 plus 0.0 1480 10 locus_11 SALM_1452 | 1-phosphofructokinase K 858 5b128d659955716833ce42f2bb060212 286 4daecf1a6ccae76e1d97a4ce9ee4ff0b 600.6 1115.4 200.2 371.8 80 64 80 80 0 0 +locus_12:11:0:3 3 11 972 972 1 972 1 972 972 0 100.0 100 100 plus 0.0 1796 11 locus_12 SALM_11020 _ 1 L 972 eb72da68c159497d5f0c8eeddc51b5ae 324 a1f16dd269dc295e715f2d00f9c26b43 680.4 1263.6 226.8 421.2 80 64 80 80 0 0 +locus_13:12:0:4 4 12 1098 1098 1 1098 1 1098 1098 0 100.0 100 100 plus 0.0 2028 12 locus_13 SALM_1934 - 3.9 M 1098 8f300259dcb46224bdc1fe5273107324 366 b16bc26e42f5bc4327504b1ec8b2d53d 768.6 1427.4 256.2 475.8 80 64 80 80 0 0 +locus_14:13:0:5 5 13 1281 1281 1 1281 1 1281 1281 11 99.141 100 100 plus 0.0 2305 13 locus_14 SALM_2871 + @ N 1281 b9060019038526aa6fc38d2f7510edc6 427 ed11561b2bedaa12c7a28eb0e9346101 896.7 1665.3 298.9 555.1 80 64 80 80 0 0 +locus_15:14:0:6 6 14 1434 1434 1 1434 1 1434 1434 0 100.0 100 100 plus 0.0 2649 14 locus_15 SALM_583 = DMT family transporter O 1434 bc98c2fe196a68a79036814396513a8d 478 16cb2acec887d5861327e04f2705b8ce 1003.8 1864.2 334.6 621.4 80 64 80 80 0 0 +locus_16:15:0:7 7 15 1464 1464 1 1464 1 1464 1464 15 98.975 100 100 plus 0.0 2621 15 locus_16 SALM_780 < murein transglycosylase A P 1464 16e55766c603fe33c9e75d8e81743ae2 488 b20314e55f9713235e9c4ea5817b56df 1024.8 1903.2 341.6 634.4 80 64 80 80 0 0 +locus_17:16:0:8 8 16 1836 1836 1 1836 1 1836 1836 0 100.0 100 100 plus 0.0 3391 16 locus_17 SALM_1937 > GTPase HflX Q 1836 a0d97d985483413f3c18bfe5833ae9ce 612 d6012eddc7a6e8d7d40761d00ed71a5a 1285.2 2386.8 428.4 795.6 80 64 80 80 0 0 +locus_18:17:0:9 9 17 1914 1914 1 1914 1 1914 1914 0 100.0 100 100 plus 0.0 3535 17 locus_18 SALM_1997 ? biosynthetic arginine decarboxylase R 1914 b3021e979faa7600756c06dfadfcf14c 638 e68e83956ee1d4c685571e5348c8def1 1339.8 2488.2 446.6 829.4 80 64 80 80 0 0 +locus_19:18:0:10 10 18 2037 2037 1 2037 1 2037 2037 16 99.215 100 100 plus 0.0 3674 18 locus_19 SALM_9926 , https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662 S 2037 a012eee23637b48e39b00808a057e35d 679 1302fea1dbd5bb756db34e09b863ad44 1425.9 2648.1 475.3 882.7 80 64 80 80 0 0 +locus_2:1:0:11 11 1 285 285 1 285 1 285 285 17 94.035 100 100 plus 2.3100000000000004e-124 433 1 locus_2 SALM_120 @ outer membrane protein B 285 e35184c8ff18e9116fc8faef20532f56 95 2d9e7f7f0d11bf02db860b0799e7924d 199.5 370.5 66.5 123.5 80 64 80 80 0 0 +locus_20:19:0:12 12 19 4935 4935 1 4935 1 4935 4935 0 100.0 100 100 plus 0.0 9114 19 locus_20 SALM_6064 . alpha-2-macroglobulin family protein T 4935 4461918e985715e4a2b07494e1f91326 1645 12ff39a1933fa478729b142976b0a659 3454.5 6415.5 1151.5 2138.5 80 64 80 80 0 0 +locus_3:2:0:13 13 2 327 327 1 327 1 327 327 0 100.0 100 100 plus 5.34e-176 604 2 locus_3 SALM_2016 # tRNA (guanosine(46)-N7)-methyltransferase TrmB C 327 670705cd2a59c4a23a897ac656a888fe 109 4277f8bbf1fd6682001da089fea83d04 228.9 425.1 76.3 141.7 80 64 80 80 0 0 +locus_4:3:0:14 14 3 417 417 1 417 1 417 417 11 97.362 100 100 plus 0.0 710 3 locus_4 SALM_8644 $ AZ624_004720 AZ624_004720 417 ac1b21798c0f672ad26f5a91ea278590 139 0c25367401155278f34832f184ab44e6 291.9 542.1 97.3 180.7 80 64 80 80 0 0 +locus_5:4:0:15 15 4 444 444 1 444 1 444 444 15 96.622 100 100 plus 0.0 737 4 locus_5 SALM_1876 % SPI-1 type III secretion system invasion lipoprotein InvH E 444 d00defcca8588f21ce16fa1d0ac13389 148 c8bf12a8057fc5e541fcd4924136a40d 310.8 577.2 103.6 192.4 80 64 80 80 0 0 +locus_6:5:0:16 16 5 543 543 1 543 1 543 543 0 100.0 100 100 plus 0.0 1003 5 locus_6 SALM_640 ^ MOSC domain-containing protein F 543 a11561f2804e2c32c78049f8b9aeb517 181 1f6a45ea291940ef2c17ec1cfdd5fbdd 380.1 705.9 126.7 235.3 80 64 80 80 0 0 +locus_7:6:0:17 17 6 606 606 1 606 1 606 606 15 97.525 100 100 plus 0.0 1037 6 locus_7 SALM_1501 & India: Vellore G 606 dc94bf1ec4ff9bed2a1f460cbd958656 202 62252b3326997117f127efb88ff09294 424.2 787.8 141.4 262.6 80 64 80 80 0 0 +locus_8:7:0:18 18 7 642 642 1 642 1 642 642 0 100.0 100 100 plus 0.0 1186 7 locus_8 SALM_756 * DNA polymerase III subunit delta' H 642 7ebe74afecf146ec4db816c8deced64f 214 2449629747a7c58f0f2cad411db87178 449.4 834.6 149.8 278.2 80 64 80 80 0 0 +locus_9:8:0:19 19 8 684 684 1 684 1 684 684 0 100.0 100 100 plus 0.0 1264 8 locus_9 SALM_7353 1 fimbrial assembly chaperone I 684 41ebb36872854b2b33c8c028e23d8ad1 228 a75a93991228940fb46d917f238beb5a 478.8 889.2 159.6 296.4 80 64 80 80 0 0 diff --git a/tests/test_data/outputs/report/conservative/G7/profile.json b/tests/test_data/outputs/report/conservative/G7/profile.json new file mode 100755 index 0000000..601f66d --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G7/profile.json @@ -0,0 +1,24 @@ +{ + "G7": { + "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", + "locus_2": "8b70e777f6bbf2c91ff75947824b5976", + "locus_3": "670705cd2a59c4a23a897ac656a888fe", + "locus_4": "73790840c76943caac0ebb3b2b3f0b98", + "locus_5": "8cf4341689dd00f74adfcc43d1f4a35e", + "locus_6": "a11561f2804e2c32c78049f8b9aeb517", + "locus_7": "49d9878c9d3071aa1d2f26cb947b784c", + "locus_8": "7ebe74afecf146ec4db816c8deced64f", + "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", + "locus_10": "796419469778f7ec3851c813f59cfff7", + "locus_11": "c4266f2f24fdd8e039113c6b0955af9f", + "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", + "locus_13": "8f300259dcb46224bdc1fe5273107324", + "locus_14": "2fa0b06ed72e36b4071cab9d0b4f87d0", + "locus_15": "bc98c2fe196a68a79036814396513a8d", + "locus_16": "a9b3cb97dac3cda6e932a49bf9a507bd", + "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", + "locus_18": "b3021e979faa7600756c06dfadfcf14c", + "locus_19": "de32372598811d63bcc1a0eaf6872644", + "locus_20": "4461918e985715e4a2b07494e1f91326" + } +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/conservative/G7/protein.hits.txt b/tests/test_data/outputs/report/conservative/G7/protein.hits.txt new file mode 100755 index 0000000..f7e8456 --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G7/protein.hits.txt @@ -0,0 +1,21 @@ +query_name qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore seq_id locus_name locus_name_alt locus_product locus_description locus_uid dna_seq_len dna_seq_hash aa_seq_len aa_seq_hash dna_min_len dna_max_len aa_min_len aa_max_len dna_min_ident aa_min_ident min_dna_match_cov min_aa_match_cov count_int_stops dna_ambig_count +locus_1:0:0:0 0 0 34 34 1 34 1 34 34 0 100.0 100 100 4.94e-20 64.3 0 locus_1 SALM_11273 ! hypothetical protein 1 102 d17b02d12afa7f832ee37df6f24a8f55 34 84ac553cb45dd790c497c27152888f02 71.4 132.6 23.8 44.2 80 64 80 80 0 0 +locus_10:9:0:1 1 9 254 254 2 254 2 254 253 0 100.0 99 99 0.0 512.0 9 locus_10 SALM_1891 200.96 5'/3'-nucleotidase SurE J 762 fe04d17ec353c08b903c85fc0ca4dc02 254 bd09702e070040e0fc8d2ec3b830812c 533.4 990.6 177.8 330.2 80 64 80 80 0 0 +locus_11:10:0:2 2 10 286 286 1 286 1 286 286 17 94.056 100 100 0.0 518.0 10 locus_11 SALM_1452 | 1-phosphofructokinase K 858 5b128d659955716833ce42f2bb060212 286 4daecf1a6ccae76e1d97a4ce9ee4ff0b 600.6 1115.4 200.2 371.8 80 64 80 80 0 0 +locus_12:11:0:3 3 11 324 324 1 324 1 324 324 0 100.0 100 100 0.0 640.0 11 locus_12 SALM_11020 _ 1 L 972 eb72da68c159497d5f0c8eeddc51b5ae 324 a1f16dd269dc295e715f2d00f9c26b43 680.4 1263.6 226.8 421.2 80 64 80 80 0 0 +locus_13:12:0:4 4 12 366 366 1 366 1 366 366 0 100.0 100 100 0.0 754.0 12 locus_13 SALM_1934 - 3.9 M 1098 8f300259dcb46224bdc1fe5273107324 366 b16bc26e42f5bc4327504b1ec8b2d53d 768.6 1427.4 256.2 475.8 80 64 80 80 0 0 +locus_14:13:0:5 5 13 427 427 1 427 1 427 427 9 97.892 100 100 0.0 846.0 13 locus_14 SALM_2871 + @ N 1281 b9060019038526aa6fc38d2f7510edc6 427 ed11561b2bedaa12c7a28eb0e9346101 896.7 1665.3 298.9 555.1 80 64 80 80 0 0 +locus_15:14:0:6 6 14 478 478 1 478 1 478 478 0 100.0 100 100 0.0 972.0 14 locus_15 SALM_583 = DMT family transporter O 1434 bc98c2fe196a68a79036814396513a8d 478 16cb2acec887d5861327e04f2705b8ce 1003.8 1864.2 334.6 621.4 80 64 80 80 0 0 +locus_16:15:0:7 7 15 488 488 1 488 1 488 488 14 97.131 100 100 0.0 972.0 15 locus_16 SALM_780 < murein transglycosylase A P 1464 16e55766c603fe33c9e75d8e81743ae2 488 b20314e55f9713235e9c4ea5817b56df 1024.8 1903.2 341.6 634.4 80 64 80 80 0 0 +locus_17:16:0:8 8 16 612 612 1 612 1 612 612 0 100.0 100 100 0.0 1241.0 16 locus_17 SALM_1937 > GTPase HflX Q 1836 a0d97d985483413f3c18bfe5833ae9ce 612 d6012eddc7a6e8d7d40761d00ed71a5a 1285.2 2386.8 428.4 795.6 80 64 80 80 0 0 +locus_18:17:0:9 9 17 638 638 1 638 1 638 638 0 100.0 100 100 0.0 1326.0 17 locus_18 SALM_1997 ? biosynthetic arginine decarboxylase R 1914 b3021e979faa7600756c06dfadfcf14c 638 e68e83956ee1d4c685571e5348c8def1 1339.8 2488.2 446.6 829.4 80 64 80 80 0 0 +locus_19:18:0:10 10 18 679 679 1 679 1 679 679 8 98.822 100 100 0.0 1403.0 18 locus_19 SALM_9926 , https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662 S 2037 a012eee23637b48e39b00808a057e35d 679 1302fea1dbd5bb756db34e09b863ad44 1425.9 2648.1 475.3 882.7 80 64 80 80 0 0 +locus_2:1:0:11 11 1 95 95 1 95 1 95 95 11 88.421 100 100 2.89e-51 147.0 1 locus_2 SALM_120 @ outer membrane protein B 285 e35184c8ff18e9116fc8faef20532f56 95 2d9e7f7f0d11bf02db860b0799e7924d 199.5 370.5 66.5 123.5 80 64 80 80 0 0 +locus_20:19:0:12 12 19 1645 1645 1 1645 1 1645 1645 0 100.0 100 100 0.0 3332.0 19 locus_20 SALM_6064 . alpha-2-macroglobulin family protein T 4935 4461918e985715e4a2b07494e1f91326 1645 12ff39a1933fa478729b142976b0a659 3454.5 6415.5 1151.5 2138.5 80 64 80 80 0 0 +locus_3:2:0:13 13 2 109 109 1 109 1 109 109 0 100.0 100 100 3.09e-81 224.0 2 locus_3 SALM_2016 # tRNA (guanosine(46)-N7)-methyltransferase TrmB C 327 670705cd2a59c4a23a897ac656a888fe 109 4277f8bbf1fd6682001da089fea83d04 228.9 425.1 76.3 141.7 80 64 80 80 0 0 +locus_4:3:0:14 14 3 139 139 1 139 1 139 139 9 93.525 100 100 3.6999999999999996e-98 269.0 3 locus_4 SALM_8644 $ AZ624_004720 AZ624_004720 417 ac1b21798c0f672ad26f5a91ea278590 139 0c25367401155278f34832f184ab44e6 291.9 542.1 97.3 180.7 80 64 80 80 0 0 +locus_5:4:0:15 15 4 148 148 1 148 1 148 148 11 92.568 100 100 1.5600000000000001e-97 268.0 4 locus_5 SALM_1876 % SPI-1 type III secretion system invasion lipoprotein InvH E 444 d00defcca8588f21ce16fa1d0ac13389 148 c8bf12a8057fc5e541fcd4924136a40d 310.8 577.2 103.6 192.4 80 64 80 80 0 0 +locus_6:5:0:16 16 5 181 181 1 181 1 181 181 0 100.0 100 100 1.7899999999999999e-140 380.0 5 locus_6 SALM_640 ^ MOSC domain-containing protein F 543 a11561f2804e2c32c78049f8b9aeb517 181 1f6a45ea291940ef2c17ec1cfdd5fbdd 380.1 705.9 126.7 235.3 80 64 80 80 0 0 +locus_7:6:0:17 17 6 202 202 1 202 1 202 202 14 93.069 100 100 2.36e-141 384.0 6 locus_7 SALM_1501 & India: Vellore G 606 dc94bf1ec4ff9bed2a1f460cbd958656 202 62252b3326997117f127efb88ff09294 424.2 787.8 141.4 262.6 80 64 80 80 0 0 +locus_8:7:0:18 18 7 214 214 1 214 1 214 214 0 100.0 100 100 1.75e-157 426.0 7 locus_8 SALM_756 * DNA polymerase III subunit delta' H 642 7ebe74afecf146ec4db816c8deced64f 214 2449629747a7c58f0f2cad411db87178 449.4 834.6 149.8 278.2 80 64 80 80 0 0 +locus_9:8:0:19 19 8 228 228 1 228 1 228 228 0 100.0 100 100 6.32e-172 463.0 8 locus_9 SALM_7353 1 fimbrial assembly chaperone I 684 41ebb36872854b2b33c8c028e23d8ad1 228 a75a93991228940fb46d917f238beb5a 478.8 889.2 159.6 296.4 80 64 80 80 0 0 diff --git a/tests/test_data/outputs/report/conservative/G8/nucleotide.hits.txt b/tests/test_data/outputs/report/conservative/G8/nucleotide.hits.txt new file mode 100755 index 0000000..aac444b --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G8/nucleotide.hits.txt @@ -0,0 +1,21 @@ +query_name qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore seq_id locus_name locus_name_alt locus_product locus_description locus_uid dna_seq_len dna_seq_hash aa_seq_len aa_seq_hash dna_min_len dna_max_len aa_min_len aa_max_len dna_min_ident aa_min_ident min_dna_match_cov min_aa_match_cov count_int_stops dna_ambig_count +locus_1:0:0:0 0 0 102 102 1 102 1 102 102 0 100.0 100 100 plus 1.8e-51 189 0 locus_1 SALM_11273 ! hypothetical protein 1 102 d17b02d12afa7f832ee37df6f24a8f55 34 84ac553cb45dd790c497c27152888f02 71.4 132.6 23.8 44.2 80 64 80 80 0 0 +locus_10:9:0:1 1 9 762 762 4 762 4 762 759 0 100.0 99 99 plus 0.0 1402 9 locus_10 SALM_1891 200.96 5'/3'-nucleotidase SurE J 762 fe04d17ec353c08b903c85fc0ca4dc02 254 bd09702e070040e0fc8d2ec3b830812c 533.4 990.6 177.8 330.2 80 64 80 80 0 0 +locus_11:10:0:2 2 10 858 858 1 858 1 858 858 0 100.0 100 100 plus 0.0 1585 10 locus_11 SALM_1452 | 1-phosphofructokinase K 858 5b128d659955716833ce42f2bb060212 286 4daecf1a6ccae76e1d97a4ce9ee4ff0b 600.6 1115.4 200.2 371.8 80 64 80 80 0 0 +locus_12:11:0:3 3 11 972 972 1 972 1 972 972 0 100.0 100 100 plus 0.0 1796 11 locus_12 SALM_11020 _ 1 L 972 eb72da68c159497d5f0c8eeddc51b5ae 324 a1f16dd269dc295e715f2d00f9c26b43 680.4 1263.6 226.8 421.2 80 64 80 80 0 0 +locus_13:12:0:4 4 12 1098 1098 1 1098 1 1098 1098 0 100.0 100 100 plus 0.0 2028 12 locus_13 SALM_1934 - 3.9 M 1098 8f300259dcb46224bdc1fe5273107324 366 b16bc26e42f5bc4327504b1ec8b2d53d 768.6 1427.4 256.2 475.8 80 64 80 80 0 0 +locus_14:13:0:5 5 13 1281 1281 1 1281 1 1281 1281 0 100.0 100 100 plus 0.0 2366 13 locus_14 SALM_2871 + @ N 1281 b9060019038526aa6fc38d2f7510edc6 427 ed11561b2bedaa12c7a28eb0e9346101 896.7 1665.3 298.9 555.1 80 64 80 80 0 0 +locus_15:14:0:6 6 14 1434 1434 1 1434 1 1434 1434 0 100.0 100 100 plus 0.0 2649 14 locus_15 SALM_583 = DMT family transporter O 1434 bc98c2fe196a68a79036814396513a8d 478 16cb2acec887d5861327e04f2705b8ce 1003.8 1864.2 334.6 621.4 80 64 80 80 0 0 +locus_16:15:0:7 7 15 1464 1464 1 1464 1 1464 1464 0 100.0 100 100 plus 0.0 2704 15 locus_16 SALM_780 < murein transglycosylase A P 1464 16e55766c603fe33c9e75d8e81743ae2 488 b20314e55f9713235e9c4ea5817b56df 1024.8 1903.2 341.6 634.4 80 64 80 80 0 0 +locus_17:16:0:8 8 16 1836 1836 1 1836 1 1836 1836 0 100.0 100 100 plus 0.0 3391 16 locus_17 SALM_1937 > GTPase HflX Q 1836 a0d97d985483413f3c18bfe5833ae9ce 612 d6012eddc7a6e8d7d40761d00ed71a5a 1285.2 2386.8 428.4 795.6 80 64 80 80 0 0 +locus_18:17:0:9 9 17 1914 1914 1 1914 1 1914 1914 0 100.0 100 100 plus 0.0 3535 17 locus_18 SALM_1997 ? biosynthetic arginine decarboxylase R 1914 b3021e979faa7600756c06dfadfcf14c 638 e68e83956ee1d4c685571e5348c8def1 1339.8 2488.2 446.6 829.4 80 64 80 80 0 0 +locus_19:18:0:10 10 18 2037 2037 1 2037 1 2037 2037 0 100.0 100 100 plus 0.0 3762 18 locus_19 SALM_9926 , https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662 S 2037 a012eee23637b48e39b00808a057e35d 679 1302fea1dbd5bb756db34e09b863ad44 1425.9 2648.1 475.3 882.7 80 64 80 80 0 0 +locus_2:1:0:11 11 1 285 285 1 285 1 285 285 0 100.0 100 100 plus 1.03e-152 527 1 locus_2 SALM_120 @ outer membrane protein B 285 e35184c8ff18e9116fc8faef20532f56 95 2d9e7f7f0d11bf02db860b0799e7924d 199.5 370.5 66.5 123.5 80 64 80 80 0 0 +locus_20:19:0:12 12 19 4935 4935 1 4935 1 4935 4935 0 100.0 100 100 plus 0.0 9114 19 locus_20 SALM_6064 . alpha-2-macroglobulin family protein T 4935 4461918e985715e4a2b07494e1f91326 1645 12ff39a1933fa478729b142976b0a659 3454.5 6415.5 1151.5 2138.5 80 64 80 80 0 0 +locus_3:2:0:13 13 2 327 327 1 327 1 327 327 0 100.0 100 100 plus 5.34e-176 604 2 locus_3 SALM_2016 # tRNA (guanosine(46)-N7)-methyltransferase TrmB C 327 670705cd2a59c4a23a897ac656a888fe 109 4277f8bbf1fd6682001da089fea83d04 228.9 425.1 76.3 141.7 80 64 80 80 0 0 +locus_4:3:0:14 14 3 417 417 1 417 1 417 417 0 100.0 100 100 plus 0.0 771 3 locus_4 SALM_8644 $ AZ624_004720 AZ624_004720 417 ac1b21798c0f672ad26f5a91ea278590 139 0c25367401155278f34832f184ab44e6 291.9 542.1 97.3 180.7 80 64 80 80 0 0 +locus_5:4:0:15 15 4 444 444 1 444 1 444 444 0 100.0 100 100 plus 0.0 821 4 locus_5 SALM_1876 % SPI-1 type III secretion system invasion lipoprotein InvH E 444 d00defcca8588f21ce16fa1d0ac13389 148 c8bf12a8057fc5e541fcd4924136a40d 310.8 577.2 103.6 192.4 80 64 80 80 0 0 +locus_6:5:0:16 16 5 543 543 1 543 1 543 543 0 100.0 100 100 plus 0.0 1003 5 locus_6 SALM_640 ^ MOSC domain-containing protein F 543 a11561f2804e2c32c78049f8b9aeb517 181 1f6a45ea291940ef2c17ec1cfdd5fbdd 380.1 705.9 126.7 235.3 80 64 80 80 0 0 +locus_7:6:0:17 17 6 606 606 1 606 1 606 606 0 100.0 100 100 plus 0.0 1120 6 locus_7 SALM_1501 & India: Vellore G 606 dc94bf1ec4ff9bed2a1f460cbd958656 202 62252b3326997117f127efb88ff09294 424.2 787.8 141.4 262.6 80 64 80 80 0 0 +locus_8:7:0:18 18 7 642 642 1 642 1 642 642 0 100.0 100 100 plus 0.0 1186 7 locus_8 SALM_756 * DNA polymerase III subunit delta' H 642 7ebe74afecf146ec4db816c8deced64f 214 2449629747a7c58f0f2cad411db87178 449.4 834.6 149.8 278.2 80 64 80 80 0 0 +locus_9:8:0:19 19 8 684 684 1 684 1 684 684 0 100.0 100 100 plus 0.0 1264 8 locus_9 SALM_7353 1 fimbrial assembly chaperone I 684 41ebb36872854b2b33c8c028e23d8ad1 228 a75a93991228940fb46d917f238beb5a 478.8 889.2 159.6 296.4 80 64 80 80 0 0 diff --git a/tests/test_data/outputs/report/conservative/G8/profile.json b/tests/test_data/outputs/report/conservative/G8/profile.json new file mode 100755 index 0000000..2386944 --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G8/profile.json @@ -0,0 +1,24 @@ +{ + "G8": { + "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", + "locus_2": "e35184c8ff18e9116fc8faef20532f56", + "locus_3": "670705cd2a59c4a23a897ac656a888fe", + "locus_4": "ac1b21798c0f672ad26f5a91ea278590", + "locus_5": "d00defcca8588f21ce16fa1d0ac13389", + "locus_6": "a11561f2804e2c32c78049f8b9aeb517", + "locus_7": "dc94bf1ec4ff9bed2a1f460cbd958656", + "locus_8": "7ebe74afecf146ec4db816c8deced64f", + "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", + "locus_10": "fd6284b58a891cf02058906c9ee37a00", + "locus_11": "5b128d659955716833ce42f2bb060212", + "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", + "locus_13": "8f300259dcb46224bdc1fe5273107324", + "locus_14": "b9060019038526aa6fc38d2f7510edc6", + "locus_15": "bc98c2fe196a68a79036814396513a8d", + "locus_16": "16e55766c603fe33c9e75d8e81743ae2", + "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", + "locus_18": "b3021e979faa7600756c06dfadfcf14c", + "locus_19": "a012eee23637b48e39b00808a057e35d", + "locus_20": "4461918e985715e4a2b07494e1f91326" + } +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/conservative/G8/protein.hits.txt b/tests/test_data/outputs/report/conservative/G8/protein.hits.txt new file mode 100755 index 0000000..56b16b3 --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G8/protein.hits.txt @@ -0,0 +1,21 @@ +query_name qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore seq_id locus_name locus_name_alt locus_product locus_description locus_uid dna_seq_len dna_seq_hash aa_seq_len aa_seq_hash dna_min_len dna_max_len aa_min_len aa_max_len dna_min_ident aa_min_ident min_dna_match_cov min_aa_match_cov count_int_stops dna_ambig_count +locus_1:0:0:0 0 0 34 34 1 34 1 34 34 0 100.0 100 100 4.94e-20 64.3 0 locus_1 SALM_11273 ! hypothetical protein 1 102 d17b02d12afa7f832ee37df6f24a8f55 34 84ac553cb45dd790c497c27152888f02 71.4 132.6 23.8 44.2 80 64 80 80 0 0 +locus_10:9:0:1 1 9 254 254 2 254 2 254 253 0 100.0 99 99 0.0 512.0 9 locus_10 SALM_1891 200.96 5'/3'-nucleotidase SurE J 762 fe04d17ec353c08b903c85fc0ca4dc02 254 bd09702e070040e0fc8d2ec3b830812c 533.4 990.6 177.8 330.2 80 64 80 80 0 0 +locus_11:10:0:2 2 10 286 286 1 286 1 286 286 0 100.0 100 100 0.0 579.0 10 locus_11 SALM_1452 | 1-phosphofructokinase K 858 5b128d659955716833ce42f2bb060212 286 4daecf1a6ccae76e1d97a4ce9ee4ff0b 600.6 1115.4 200.2 371.8 80 64 80 80 0 0 +locus_12:11:0:3 3 11 324 324 1 324 1 324 324 0 100.0 100 100 0.0 640.0 11 locus_12 SALM_11020 _ 1 L 972 eb72da68c159497d5f0c8eeddc51b5ae 324 a1f16dd269dc295e715f2d00f9c26b43 680.4 1263.6 226.8 421.2 80 64 80 80 0 0 +locus_13:12:0:4 4 12 366 366 1 366 1 366 366 0 100.0 100 100 0.0 754.0 12 locus_13 SALM_1934 - 3.9 M 1098 8f300259dcb46224bdc1fe5273107324 366 b16bc26e42f5bc4327504b1ec8b2d53d 768.6 1427.4 256.2 475.8 80 64 80 80 0 0 +locus_14:13:0:5 5 13 427 427 1 427 1 427 427 0 100.0 100 100 0.0 869.0 13 locus_14 SALM_2871 + @ N 1281 b9060019038526aa6fc38d2f7510edc6 427 ed11561b2bedaa12c7a28eb0e9346101 896.7 1665.3 298.9 555.1 80 64 80 80 0 0 +locus_15:14:0:6 6 14 478 478 1 478 1 478 478 0 100.0 100 100 0.0 972.0 14 locus_15 SALM_583 = DMT family transporter O 1434 bc98c2fe196a68a79036814396513a8d 478 16cb2acec887d5861327e04f2705b8ce 1003.8 1864.2 334.6 621.4 80 64 80 80 0 0 +locus_16:15:0:7 7 15 488 488 1 488 1 488 488 0 100.0 100 100 0.0 1004.0 15 locus_16 SALM_780 < murein transglycosylase A P 1464 16e55766c603fe33c9e75d8e81743ae2 488 b20314e55f9713235e9c4ea5817b56df 1024.8 1903.2 341.6 634.4 80 64 80 80 0 0 +locus_17:16:0:8 8 16 612 612 1 612 1 612 612 0 100.0 100 100 0.0 1241.0 16 locus_17 SALM_1937 > GTPase HflX Q 1836 a0d97d985483413f3c18bfe5833ae9ce 612 d6012eddc7a6e8d7d40761d00ed71a5a 1285.2 2386.8 428.4 795.6 80 64 80 80 0 0 +locus_18:17:0:9 9 17 638 638 1 638 1 638 638 0 100.0 100 100 0.0 1326.0 17 locus_18 SALM_1997 ? biosynthetic arginine decarboxylase R 1914 b3021e979faa7600756c06dfadfcf14c 638 e68e83956ee1d4c685571e5348c8def1 1339.8 2488.2 446.6 829.4 80 64 80 80 0 0 +locus_19:18:0:10 10 18 679 679 1 679 1 679 679 0 100.0 100 100 0.0 1419.0 18 locus_19 SALM_9926 , https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662 S 2037 a012eee23637b48e39b00808a057e35d 679 1302fea1dbd5bb756db34e09b863ad44 1425.9 2648.1 475.3 882.7 80 64 80 80 0 0 +locus_2:1:0:11 11 1 95 95 1 95 1 95 95 0 100.0 100 100 1.2299999999999998e-68 191.0 1 locus_2 SALM_120 @ outer membrane protein B 285 e35184c8ff18e9116fc8faef20532f56 95 2d9e7f7f0d11bf02db860b0799e7924d 199.5 370.5 66.5 123.5 80 64 80 80 0 0 +locus_20:19:0:12 12 19 1645 1645 1 1645 1 1645 1645 0 100.0 100 100 0.0 3332.0 19 locus_20 SALM_6064 . alpha-2-macroglobulin family protein T 4935 4461918e985715e4a2b07494e1f91326 1645 12ff39a1933fa478729b142976b0a659 3454.5 6415.5 1151.5 2138.5 80 64 80 80 0 0 +locus_3:2:0:13 13 2 109 109 1 109 1 109 109 0 100.0 100 100 3.09e-81 224.0 2 locus_3 SALM_2016 # tRNA (guanosine(46)-N7)-methyltransferase TrmB C 327 670705cd2a59c4a23a897ac656a888fe 109 4277f8bbf1fd6682001da089fea83d04 228.9 425.1 76.3 141.7 80 64 80 80 0 0 +locus_4:3:0:14 14 3 139 139 1 139 1 139 139 0 100.0 100 100 2.34e-106 290.0 3 locus_4 SALM_8644 $ AZ624_004720 AZ624_004720 417 ac1b21798c0f672ad26f5a91ea278590 139 0c25367401155278f34832f184ab44e6 291.9 542.1 97.3 180.7 80 64 80 80 0 0 +locus_5:4:0:15 15 4 148 148 1 148 1 148 148 0 100.0 100 100 1.0400000000000001e-110 302.0 4 locus_5 SALM_1876 % SPI-1 type III secretion system invasion lipoprotein InvH E 444 d00defcca8588f21ce16fa1d0ac13389 148 c8bf12a8057fc5e541fcd4924136a40d 310.8 577.2 103.6 192.4 80 64 80 80 0 0 +locus_6:5:0:16 16 5 181 181 1 181 1 181 181 0 100.0 100 100 1.7899999999999999e-140 380.0 5 locus_6 SALM_640 ^ MOSC domain-containing protein F 543 a11561f2804e2c32c78049f8b9aeb517 181 1f6a45ea291940ef2c17ec1cfdd5fbdd 380.1 705.9 126.7 235.3 80 64 80 80 0 0 +locus_7:6:0:17 17 6 202 202 1 202 1 202 202 0 100.0 100 100 3.71e-154 416.0 6 locus_7 SALM_1501 & India: Vellore G 606 dc94bf1ec4ff9bed2a1f460cbd958656 202 62252b3326997117f127efb88ff09294 424.2 787.8 141.4 262.6 80 64 80 80 0 0 +locus_8:7:0:18 18 7 214 214 1 214 1 214 214 0 100.0 100 100 1.75e-157 426.0 7 locus_8 SALM_756 * DNA polymerase III subunit delta' H 642 7ebe74afecf146ec4db816c8deced64f 214 2449629747a7c58f0f2cad411db87178 449.4 834.6 149.8 278.2 80 64 80 80 0 0 +locus_9:8:0:19 19 8 228 228 1 228 1 228 228 0 100.0 100 100 6.32e-172 463.0 8 locus_9 SALM_7353 1 fimbrial assembly chaperone I 684 41ebb36872854b2b33c8c028e23d8ad1 228 a75a93991228940fb46d917f238beb5a 478.8 889.2 159.6 296.4 80 64 80 80 0 0 diff --git a/tests/test_data/outputs/report/conservative/G9/nucleotide.hits.txt b/tests/test_data/outputs/report/conservative/G9/nucleotide.hits.txt new file mode 100755 index 0000000..56b0986 --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G9/nucleotide.hits.txt @@ -0,0 +1,20 @@ +query_name qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore seq_id locus_name locus_name_alt locus_product locus_description locus_uid dna_seq_len dna_seq_hash aa_seq_len aa_seq_hash dna_min_len dna_max_len aa_min_len aa_max_len dna_min_ident aa_min_ident min_dna_match_cov min_aa_match_cov count_int_stops dna_ambig_count +locus_1:0:0:0 0 0 102 102 1 102 1 102 102 0 100.0 100 100 plus 1.8e-51 189 0 locus_1 SALM_11273 ! hypothetical protein 1 102 d17b02d12afa7f832ee37df6f24a8f55 34 84ac553cb45dd790c497c27152888f02 71.4 132.6 23.8 44.2 80 64 80 80 0 0 +locus_11:10:0:1 1 10 858 858 1 858 1 858 858 19 97.786 100 100 plus 0.0 1480 10 locus_11 SALM_1452 | 1-phosphofructokinase K 858 5b128d659955716833ce42f2bb060212 286 4daecf1a6ccae76e1d97a4ce9ee4ff0b 600.6 1115.4 200.2 371.8 80 64 80 80 0 0 +locus_12:11:0:2 2 11 972 972 1 972 1 972 972 0 100.0 100 100 plus 0.0 1796 11 locus_12 SALM_11020 _ 1 L 972 eb72da68c159497d5f0c8eeddc51b5ae 324 a1f16dd269dc295e715f2d00f9c26b43 680.4 1263.6 226.8 421.2 80 64 80 80 0 0 +locus_13:12:0:3 3 12 1098 1098 1 1098 1 1098 1098 0 100.0 100 100 plus 0.0 2028 12 locus_13 SALM_1934 - 3.9 M 1098 8f300259dcb46224bdc1fe5273107324 366 b16bc26e42f5bc4327504b1ec8b2d53d 768.6 1427.4 256.2 475.8 80 64 80 80 0 0 +locus_14:13:0:4 4 13 1281 1281 1 1281 1 1281 1281 11 99.141 100 100 plus 0.0 2305 13 locus_14 SALM_2871 + @ N 1281 b9060019038526aa6fc38d2f7510edc6 427 ed11561b2bedaa12c7a28eb0e9346101 896.7 1665.3 298.9 555.1 80 64 80 80 0 0 +locus_15:14:0:5 5 14 1434 1434 1 1434 1 1434 1434 0 100.0 100 100 plus 0.0 2649 14 locus_15 SALM_583 = DMT family transporter O 1434 bc98c2fe196a68a79036814396513a8d 478 16cb2acec887d5861327e04f2705b8ce 1003.8 1864.2 334.6 621.4 80 64 80 80 0 0 +locus_16:15:0:6 6 15 1464 1464 1 1464 1 1464 1464 15 98.975 100 100 plus 0.0 2621 15 locus_16 SALM_780 < murein transglycosylase A P 1464 16e55766c603fe33c9e75d8e81743ae2 488 b20314e55f9713235e9c4ea5817b56df 1024.8 1903.2 341.6 634.4 80 64 80 80 0 0 +locus_17:16:0:7 7 16 1836 1836 1 1836 1 1836 1836 0 100.0 100 100 plus 0.0 3391 16 locus_17 SALM_1937 > GTPase HflX Q 1836 a0d97d985483413f3c18bfe5833ae9ce 612 d6012eddc7a6e8d7d40761d00ed71a5a 1285.2 2386.8 428.4 795.6 80 64 80 80 0 0 +locus_18:17:0:8 8 17 1914 1914 1 1914 1 1914 1914 0 100.0 100 100 plus 0.0 3535 17 locus_18 SALM_1997 ? biosynthetic arginine decarboxylase R 1914 b3021e979faa7600756c06dfadfcf14c 638 e68e83956ee1d4c685571e5348c8def1 1339.8 2488.2 446.6 829.4 80 64 80 80 0 0 +locus_19:18:0:9 9 18 2037 2037 1 2037 1 2037 2037 16 99.215 100 100 plus 0.0 3674 18 locus_19 SALM_9926 , https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662 S 2037 a012eee23637b48e39b00808a057e35d 679 1302fea1dbd5bb756db34e09b863ad44 1425.9 2648.1 475.3 882.7 80 64 80 80 0 0 +locus_2:1:0:10 10 1 285 285 1 285 1 285 285 17 94.035 100 100 plus 2.3100000000000004e-124 433 1 locus_2 SALM_120 @ outer membrane protein B 285 e35184c8ff18e9116fc8faef20532f56 95 2d9e7f7f0d11bf02db860b0799e7924d 199.5 370.5 66.5 123.5 80 64 80 80 0 0 +locus_20:19:0:11 11 19 4935 4935 1 4935 1 4935 4935 0 100.0 100 100 plus 0.0 9114 19 locus_20 SALM_6064 . alpha-2-macroglobulin family protein T 4935 4461918e985715e4a2b07494e1f91326 1645 12ff39a1933fa478729b142976b0a659 3454.5 6415.5 1151.5 2138.5 80 64 80 80 0 0 +locus_3:2:0:12 12 2 327 327 1 327 1 327 327 0 100.0 100 100 plus 5.34e-176 604 2 locus_3 SALM_2016 # tRNA (guanosine(46)-N7)-methyltransferase TrmB C 327 670705cd2a59c4a23a897ac656a888fe 109 4277f8bbf1fd6682001da089fea83d04 228.9 425.1 76.3 141.7 80 64 80 80 0 0 +locus_4:3:0:13 13 3 417 417 1 417 1 417 417 11 97.362 100 100 plus 0.0 710 3 locus_4 SALM_8644 $ AZ624_004720 AZ624_004720 417 ac1b21798c0f672ad26f5a91ea278590 139 0c25367401155278f34832f184ab44e6 291.9 542.1 97.3 180.7 80 64 80 80 0 0 +locus_5:4:0:14 14 4 444 444 1 444 1 444 444 15 96.622 100 100 plus 0.0 737 4 locus_5 SALM_1876 % SPI-1 type III secretion system invasion lipoprotein InvH E 444 d00defcca8588f21ce16fa1d0ac13389 148 c8bf12a8057fc5e541fcd4924136a40d 310.8 577.2 103.6 192.4 80 64 80 80 0 0 +locus_6:5:0:15 15 5 543 543 1 543 1 543 543 0 100.0 100 100 plus 0.0 1003 5 locus_6 SALM_640 ^ MOSC domain-containing protein F 543 a11561f2804e2c32c78049f8b9aeb517 181 1f6a45ea291940ef2c17ec1cfdd5fbdd 380.1 705.9 126.7 235.3 80 64 80 80 0 0 +locus_7:6:0:16 16 6 606 606 1 606 1 606 606 15 97.525 100 100 plus 0.0 1037 6 locus_7 SALM_1501 & India: Vellore G 606 dc94bf1ec4ff9bed2a1f460cbd958656 202 62252b3326997117f127efb88ff09294 424.2 787.8 141.4 262.6 80 64 80 80 0 0 +locus_8:7:0:17 17 7 642 642 1 642 1 642 642 0 100.0 100 100 plus 0.0 1186 7 locus_8 SALM_756 * DNA polymerase III subunit delta' H 642 7ebe74afecf146ec4db816c8deced64f 214 2449629747a7c58f0f2cad411db87178 449.4 834.6 149.8 278.2 80 64 80 80 0 0 +locus_9:8:0:18 18 8 684 684 1 684 1 684 684 0 100.0 100 100 plus 0.0 1264 8 locus_9 SALM_7353 1 fimbrial assembly chaperone I 684 41ebb36872854b2b33c8c028e23d8ad1 228 a75a93991228940fb46d917f238beb5a 478.8 889.2 159.6 296.4 80 64 80 80 0 0 diff --git a/tests/test_data/outputs/report/conservative/G9/profile.json b/tests/test_data/outputs/report/conservative/G9/profile.json new file mode 100755 index 0000000..defcdbe --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G9/profile.json @@ -0,0 +1,24 @@ +{ + "G9": { + "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", + "locus_2": "8b70e777f6bbf2c91ff75947824b5976", + "locus_3": "670705cd2a59c4a23a897ac656a888fe", + "locus_4": "73790840c76943caac0ebb3b2b3f0b98", + "locus_5": "8cf4341689dd00f74adfcc43d1f4a35e", + "locus_6": "a11561f2804e2c32c78049f8b9aeb517", + "locus_7": "49d9878c9d3071aa1d2f26cb947b784c", + "locus_8": "7ebe74afecf146ec4db816c8deced64f", + "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", + "locus_10": "-", + "locus_11": "c4266f2f24fdd8e039113c6b0955af9f", + "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", + "locus_13": "8f300259dcb46224bdc1fe5273107324", + "locus_14": "2fa0b06ed72e36b4071cab9d0b4f87d0", + "locus_15": "bc98c2fe196a68a79036814396513a8d", + "locus_16": "a9b3cb97dac3cda6e932a49bf9a507bd", + "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", + "locus_18": "b3021e979faa7600756c06dfadfcf14c", + "locus_19": "de32372598811d63bcc1a0eaf6872644", + "locus_20": "4461918e985715e4a2b07494e1f91326" + } +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/conservative/G9/protein.hits.txt b/tests/test_data/outputs/report/conservative/G9/protein.hits.txt new file mode 100755 index 0000000..8d4f5d1 --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G9/protein.hits.txt @@ -0,0 +1,20 @@ +query_name qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore seq_id locus_name locus_name_alt locus_product locus_description locus_uid dna_seq_len dna_seq_hash aa_seq_len aa_seq_hash dna_min_len dna_max_len aa_min_len aa_max_len dna_min_ident aa_min_ident min_dna_match_cov min_aa_match_cov count_int_stops dna_ambig_count +locus_1:0:0:0 0 0 34 34 1 34 1 34 34 0 100.0 100 100 4.94e-20 64.3 0 locus_1 SALM_11273 ! hypothetical protein 1 102 d17b02d12afa7f832ee37df6f24a8f55 34 84ac553cb45dd790c497c27152888f02 71.4 132.6 23.8 44.2 80 64 80 80 0 0 +locus_11:10:0:1 1 10 286 286 1 286 1 286 286 17 94.056 100 100 0.0 518.0 10 locus_11 SALM_1452 | 1-phosphofructokinase K 858 5b128d659955716833ce42f2bb060212 286 4daecf1a6ccae76e1d97a4ce9ee4ff0b 600.6 1115.4 200.2 371.8 80 64 80 80 0 0 +locus_12:11:0:2 2 11 324 324 1 324 1 324 324 0 100.0 100 100 0.0 640.0 11 locus_12 SALM_11020 _ 1 L 972 eb72da68c159497d5f0c8eeddc51b5ae 324 a1f16dd269dc295e715f2d00f9c26b43 680.4 1263.6 226.8 421.2 80 64 80 80 0 0 +locus_13:12:0:3 3 12 366 366 1 366 1 366 366 0 100.0 100 100 0.0 754.0 12 locus_13 SALM_1934 - 3.9 M 1098 8f300259dcb46224bdc1fe5273107324 366 b16bc26e42f5bc4327504b1ec8b2d53d 768.6 1427.4 256.2 475.8 80 64 80 80 0 0 +locus_14:13:0:4 4 13 427 427 1 427 1 427 427 9 97.892 100 100 0.0 846.0 13 locus_14 SALM_2871 + @ N 1281 b9060019038526aa6fc38d2f7510edc6 427 ed11561b2bedaa12c7a28eb0e9346101 896.7 1665.3 298.9 555.1 80 64 80 80 0 0 +locus_15:14:0:5 5 14 478 478 1 478 1 478 478 0 100.0 100 100 0.0 972.0 14 locus_15 SALM_583 = DMT family transporter O 1434 bc98c2fe196a68a79036814396513a8d 478 16cb2acec887d5861327e04f2705b8ce 1003.8 1864.2 334.6 621.4 80 64 80 80 0 0 +locus_16:15:0:6 6 15 488 488 1 488 1 488 488 14 97.131 100 100 0.0 972.0 15 locus_16 SALM_780 < murein transglycosylase A P 1464 16e55766c603fe33c9e75d8e81743ae2 488 b20314e55f9713235e9c4ea5817b56df 1024.8 1903.2 341.6 634.4 80 64 80 80 0 0 +locus_17:16:0:7 7 16 612 612 1 612 1 612 612 0 100.0 100 100 0.0 1241.0 16 locus_17 SALM_1937 > GTPase HflX Q 1836 a0d97d985483413f3c18bfe5833ae9ce 612 d6012eddc7a6e8d7d40761d00ed71a5a 1285.2 2386.8 428.4 795.6 80 64 80 80 0 0 +locus_18:17:0:8 8 17 638 638 1 638 1 638 638 0 100.0 100 100 0.0 1326.0 17 locus_18 SALM_1997 ? biosynthetic arginine decarboxylase R 1914 b3021e979faa7600756c06dfadfcf14c 638 e68e83956ee1d4c685571e5348c8def1 1339.8 2488.2 446.6 829.4 80 64 80 80 0 0 +locus_19:18:0:9 9 18 679 679 1 679 1 679 679 8 98.822 100 100 0.0 1403.0 18 locus_19 SALM_9926 , https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662 S 2037 a012eee23637b48e39b00808a057e35d 679 1302fea1dbd5bb756db34e09b863ad44 1425.9 2648.1 475.3 882.7 80 64 80 80 0 0 +locus_2:1:0:10 10 1 95 95 1 95 1 95 95 11 88.421 100 100 2.89e-51 147.0 1 locus_2 SALM_120 @ outer membrane protein B 285 e35184c8ff18e9116fc8faef20532f56 95 2d9e7f7f0d11bf02db860b0799e7924d 199.5 370.5 66.5 123.5 80 64 80 80 0 0 +locus_20:19:0:11 11 19 1645 1645 1 1645 1 1645 1645 0 100.0 100 100 0.0 3332.0 19 locus_20 SALM_6064 . alpha-2-macroglobulin family protein T 4935 4461918e985715e4a2b07494e1f91326 1645 12ff39a1933fa478729b142976b0a659 3454.5 6415.5 1151.5 2138.5 80 64 80 80 0 0 +locus_3:2:0:12 12 2 109 109 1 109 1 109 109 0 100.0 100 100 3.09e-81 224.0 2 locus_3 SALM_2016 # tRNA (guanosine(46)-N7)-methyltransferase TrmB C 327 670705cd2a59c4a23a897ac656a888fe 109 4277f8bbf1fd6682001da089fea83d04 228.9 425.1 76.3 141.7 80 64 80 80 0 0 +locus_4:3:0:13 13 3 139 139 1 139 1 139 139 9 93.525 100 100 3.6999999999999996e-98 269.0 3 locus_4 SALM_8644 $ AZ624_004720 AZ624_004720 417 ac1b21798c0f672ad26f5a91ea278590 139 0c25367401155278f34832f184ab44e6 291.9 542.1 97.3 180.7 80 64 80 80 0 0 +locus_5:4:0:14 14 4 148 148 1 148 1 148 148 11 92.568 100 100 1.5600000000000001e-97 268.0 4 locus_5 SALM_1876 % SPI-1 type III secretion system invasion lipoprotein InvH E 444 d00defcca8588f21ce16fa1d0ac13389 148 c8bf12a8057fc5e541fcd4924136a40d 310.8 577.2 103.6 192.4 80 64 80 80 0 0 +locus_6:5:0:15 15 5 181 181 1 181 1 181 181 0 100.0 100 100 1.7899999999999999e-140 380.0 5 locus_6 SALM_640 ^ MOSC domain-containing protein F 543 a11561f2804e2c32c78049f8b9aeb517 181 1f6a45ea291940ef2c17ec1cfdd5fbdd 380.1 705.9 126.7 235.3 80 64 80 80 0 0 +locus_7:6:0:16 16 6 202 202 1 202 1 202 202 14 93.069 100 100 2.36e-141 384.0 6 locus_7 SALM_1501 & India: Vellore G 606 dc94bf1ec4ff9bed2a1f460cbd958656 202 62252b3326997117f127efb88ff09294 424.2 787.8 141.4 262.6 80 64 80 80 0 0 +locus_8:7:0:17 17 7 214 214 1 214 1 214 214 0 100.0 100 100 1.75e-157 426.0 7 locus_8 SALM_756 * DNA polymerase III subunit delta' H 642 7ebe74afecf146ec4db816c8deced64f 214 2449629747a7c58f0f2cad411db87178 449.4 834.6 149.8 278.2 80 64 80 80 0 0 +locus_9:8:0:18 18 8 228 228 1 228 1 228 228 0 100.0 100 100 6.32e-172 463.0 8 locus_9 SALM_7353 1 fimbrial assembly chaperone I 684 41ebb36872854b2b33c8c028e23d8ad1 228 a75a93991228940fb46d917f238beb5a 478.8 889.2 159.6 296.4 80 64 80 80 0 0 diff --git a/tests/test_data/outputs/report/normal/G1/nucleotide.hits.txt b/tests/test_data/outputs/report/normal/G1/nucleotide.hits.txt new file mode 100755 index 0000000..9871163 --- /dev/null +++ b/tests/test_data/outputs/report/normal/G1/nucleotide.hits.txt @@ -0,0 +1,21 @@ +query_name qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore seq_id locus_name locus_name_alt locus_product locus_description locus_uid dna_seq_len dna_seq_hash aa_seq_len aa_seq_hash dna_min_len dna_max_len aa_min_len aa_max_len dna_min_ident aa_min_ident min_dna_match_cov min_aa_match_cov count_int_stops dna_ambig_count +locus_1:0:0:0 0 0 102 102 1 102 1 102 102 0 100.0 100 100 plus 1.8e-51 189 0 locus_1 SALM_11273 ! hypothetical protein 1 102 d17b02d12afa7f832ee37df6f24a8f55 34 84ac553cb45dd790c497c27152888f02 71.4 132.6 23.8 44.2 80 64 80 80 0 0 +locus_10:9:0:1 1 9 762 762 1 762 1 762 762 0 100.0 100 100 plus 0.0 1408 9 locus_10 SALM_1891 200.96 5'/3'-nucleotidase SurE J 762 fe04d17ec353c08b903c85fc0ca4dc02 254 bd09702e070040e0fc8d2ec3b830812c 533.4 990.6 177.8 330.2 80 64 80 80 0 0 +locus_11:10:0:2 2 10 858 858 1 858 1 858 858 19 97.786 100 100 plus 0.0 1480 10 locus_11 SALM_1452 | 1-phosphofructokinase K 858 5b128d659955716833ce42f2bb060212 286 4daecf1a6ccae76e1d97a4ce9ee4ff0b 600.6 1115.4 200.2 371.8 80 64 80 80 0 0 +locus_12:11:0:3 3 11 972 972 1 972 1 972 972 0 100.0 100 100 plus 0.0 1796 11 locus_12 SALM_11020 _ 1 L 972 eb72da68c159497d5f0c8eeddc51b5ae 324 a1f16dd269dc295e715f2d00f9c26b43 680.4 1263.6 226.8 421.2 80 64 80 80 0 0 +locus_13:12:0:4 4 12 1098 1098 1 1098 1 1098 1098 0 100.0 100 100 plus 0.0 2028 12 locus_13 SALM_1934 - 3.9 M 1098 8f300259dcb46224bdc1fe5273107324 366 b16bc26e42f5bc4327504b1ec8b2d53d 768.6 1427.4 256.2 475.8 80 64 80 80 0 0 +locus_14:13:0:5 5 13 1281 1281 1 1281 1 1281 1281 11 99.141 100 100 plus 0.0 2305 13 locus_14 SALM_2871 + @ N 1281 b9060019038526aa6fc38d2f7510edc6 427 ed11561b2bedaa12c7a28eb0e9346101 896.7 1665.3 298.9 555.1 80 64 80 80 0 0 +locus_15:14:0:6 6 14 1434 1434 1 1434 1 1434 1434 0 100.0 100 100 plus 0.0 2649 14 locus_15 SALM_583 = DMT family transporter O 1434 bc98c2fe196a68a79036814396513a8d 478 16cb2acec887d5861327e04f2705b8ce 1003.8 1864.2 334.6 621.4 80 64 80 80 0 0 +locus_16:15:0:7 7 15 1464 1464 1 1464 1 1464 1464 15 98.975 100 100 plus 0.0 2621 15 locus_16 SALM_780 < murein transglycosylase A P 1464 16e55766c603fe33c9e75d8e81743ae2 488 b20314e55f9713235e9c4ea5817b56df 1024.8 1903.2 341.6 634.4 80 64 80 80 0 0 +locus_17:16:0:8 8 16 1836 1836 1 1836 1 1836 1836 0 100.0 100 100 plus 0.0 3391 16 locus_17 SALM_1937 > GTPase HflX Q 1836 a0d97d985483413f3c18bfe5833ae9ce 612 d6012eddc7a6e8d7d40761d00ed71a5a 1285.2 2386.8 428.4 795.6 80 64 80 80 0 0 +locus_18:17:0:9 9 17 1914 1914 1 1914 1 1914 1914 0 100.0 100 100 plus 0.0 3535 17 locus_18 SALM_1997 ? biosynthetic arginine decarboxylase R 1914 b3021e979faa7600756c06dfadfcf14c 638 e68e83956ee1d4c685571e5348c8def1 1339.8 2488.2 446.6 829.4 80 64 80 80 0 0 +locus_19:18:0:10 10 18 2037 2037 1 2037 1 2037 2037 16 99.215 100 100 plus 0.0 3674 18 locus_19 SALM_9926 , https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662 S 2037 a012eee23637b48e39b00808a057e35d 679 1302fea1dbd5bb756db34e09b863ad44 1425.9 2648.1 475.3 882.7 80 64 80 80 0 0 +locus_2:1:0:11 11 1 285 285 1 285 1 285 285 17 94.035 100 100 plus 2.3100000000000004e-124 433 1 locus_2 SALM_120 @ outer membrane protein B 285 e35184c8ff18e9116fc8faef20532f56 95 2d9e7f7f0d11bf02db860b0799e7924d 199.5 370.5 66.5 123.5 80 64 80 80 0 0 +locus_20:19:0:12 12 19 4935 4935 1 4935 1 4935 4935 0 100.0 100 100 plus 0.0 9114 19 locus_20 SALM_6064 . alpha-2-macroglobulin family protein T 4935 4461918e985715e4a2b07494e1f91326 1645 12ff39a1933fa478729b142976b0a659 3454.5 6415.5 1151.5 2138.5 80 64 80 80 0 0 +locus_3:2:0:13 13 2 327 327 1 327 1 327 327 0 100.0 100 100 plus 5.34e-176 604 2 locus_3 SALM_2016 # tRNA (guanosine(46)-N7)-methyltransferase TrmB C 327 670705cd2a59c4a23a897ac656a888fe 109 4277f8bbf1fd6682001da089fea83d04 228.9 425.1 76.3 141.7 80 64 80 80 0 0 +locus_4:3:0:14 14 3 417 417 1 417 1 417 417 11 97.362 100 100 plus 0.0 710 3 locus_4 SALM_8644 $ AZ624_004720 AZ624_004720 417 ac1b21798c0f672ad26f5a91ea278590 139 0c25367401155278f34832f184ab44e6 291.9 542.1 97.3 180.7 80 64 80 80 0 0 +locus_5:4:0:15 15 4 444 444 1 444 1 444 444 15 96.622 100 100 plus 0.0 737 4 locus_5 SALM_1876 % SPI-1 type III secretion system invasion lipoprotein InvH E 444 d00defcca8588f21ce16fa1d0ac13389 148 c8bf12a8057fc5e541fcd4924136a40d 310.8 577.2 103.6 192.4 80 64 80 80 0 0 +locus_6:5:0:16 16 5 543 543 1 543 1 543 543 0 100.0 100 100 plus 0.0 1003 5 locus_6 SALM_640 ^ MOSC domain-containing protein F 543 a11561f2804e2c32c78049f8b9aeb517 181 1f6a45ea291940ef2c17ec1cfdd5fbdd 380.1 705.9 126.7 235.3 80 64 80 80 0 0 +locus_7:6:0:17 17 6 606 606 1 606 1 606 606 15 97.525 100 100 plus 0.0 1037 6 locus_7 SALM_1501 & India: Vellore G 606 dc94bf1ec4ff9bed2a1f460cbd958656 202 62252b3326997117f127efb88ff09294 424.2 787.8 141.4 262.6 80 64 80 80 0 0 +locus_8:7:0:18 18 7 642 642 1 642 1 642 642 0 100.0 100 100 plus 0.0 1186 7 locus_8 SALM_756 * DNA polymerase III subunit delta' H 642 7ebe74afecf146ec4db816c8deced64f 214 2449629747a7c58f0f2cad411db87178 449.4 834.6 149.8 278.2 80 64 80 80 0 0 +locus_9:8:0:19 19 8 684 684 1 684 1 684 684 0 100.0 100 100 plus 0.0 1264 8 locus_9 SALM_7353 1 fimbrial assembly chaperone I 684 41ebb36872854b2b33c8c028e23d8ad1 228 a75a93991228940fb46d917f238beb5a 478.8 889.2 159.6 296.4 80 64 80 80 0 0 diff --git a/tests/test_data/outputs/report/normal/G1/profile.json b/tests/test_data/outputs/report/normal/G1/profile.json new file mode 100755 index 0000000..3f5fdd7 --- /dev/null +++ b/tests/test_data/outputs/report/normal/G1/profile.json @@ -0,0 +1,24 @@ +{ + "G1": { + "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", + "locus_2": "8b70e777f6bbf2c91ff75947824b5976", + "locus_3": "670705cd2a59c4a23a897ac656a888fe", + "locus_4": "73790840c76943caac0ebb3b2b3f0b98", + "locus_5": "8cf4341689dd00f74adfcc43d1f4a35e", + "locus_6": "a11561f2804e2c32c78049f8b9aeb517", + "locus_7": "49d9878c9d3071aa1d2f26cb947b784c", + "locus_8": "7ebe74afecf146ec4db816c8deced64f", + "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", + "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", + "locus_11": "c4266f2f24fdd8e039113c6b0955af9f", + "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", + "locus_13": "8f300259dcb46224bdc1fe5273107324", + "locus_14": "2fa0b06ed72e36b4071cab9d0b4f87d0", + "locus_15": "bc98c2fe196a68a79036814396513a8d", + "locus_16": "a9b3cb97dac3cda6e932a49bf9a507bd", + "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", + "locus_18": "b3021e979faa7600756c06dfadfcf14c", + "locus_19": "de32372598811d63bcc1a0eaf6872644", + "locus_20": "4461918e985715e4a2b07494e1f91326" + } +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/normal/G1/protein.hits.txt b/tests/test_data/outputs/report/normal/G1/protein.hits.txt new file mode 100755 index 0000000..c9b1c8a --- /dev/null +++ b/tests/test_data/outputs/report/normal/G1/protein.hits.txt @@ -0,0 +1,21 @@ +query_name qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore seq_id locus_name locus_name_alt locus_product locus_description locus_uid dna_seq_len dna_seq_hash aa_seq_len aa_seq_hash dna_min_len dna_max_len aa_min_len aa_max_len dna_min_ident aa_min_ident min_dna_match_cov min_aa_match_cov count_int_stops dna_ambig_count +locus_1:0:0:0 0 0 34 34 1 34 1 34 34 0 100.0 100 100 4.94e-20 64.3 0 locus_1 SALM_11273 ! hypothetical protein 1 102 d17b02d12afa7f832ee37df6f24a8f55 34 84ac553cb45dd790c497c27152888f02 71.4 132.6 23.8 44.2 80 64 80 80 0 0 +locus_10:9:0:1 1 9 254 254 1 254 1 254 254 0 100.0 100 100 0.0 514.0 9 locus_10 SALM_1891 200.96 5'/3'-nucleotidase SurE J 762 fe04d17ec353c08b903c85fc0ca4dc02 254 bd09702e070040e0fc8d2ec3b830812c 533.4 990.6 177.8 330.2 80 64 80 80 0 0 +locus_11:10:0:2 2 10 286 286 1 286 1 286 286 17 94.056 100 100 0.0 518.0 10 locus_11 SALM_1452 | 1-phosphofructokinase K 858 5b128d659955716833ce42f2bb060212 286 4daecf1a6ccae76e1d97a4ce9ee4ff0b 600.6 1115.4 200.2 371.8 80 64 80 80 0 0 +locus_12:11:0:3 3 11 324 324 1 324 1 324 324 0 100.0 100 100 0.0 640.0 11 locus_12 SALM_11020 _ 1 L 972 eb72da68c159497d5f0c8eeddc51b5ae 324 a1f16dd269dc295e715f2d00f9c26b43 680.4 1263.6 226.8 421.2 80 64 80 80 0 0 +locus_13:12:0:4 4 12 366 366 1 366 1 366 366 0 100.0 100 100 0.0 754.0 12 locus_13 SALM_1934 - 3.9 M 1098 8f300259dcb46224bdc1fe5273107324 366 b16bc26e42f5bc4327504b1ec8b2d53d 768.6 1427.4 256.2 475.8 80 64 80 80 0 0 +locus_14:13:0:5 5 13 427 427 1 427 1 427 427 9 97.892 100 100 0.0 846.0 13 locus_14 SALM_2871 + @ N 1281 b9060019038526aa6fc38d2f7510edc6 427 ed11561b2bedaa12c7a28eb0e9346101 896.7 1665.3 298.9 555.1 80 64 80 80 0 0 +locus_15:14:0:6 6 14 478 478 1 478 1 478 478 0 100.0 100 100 0.0 972.0 14 locus_15 SALM_583 = DMT family transporter O 1434 bc98c2fe196a68a79036814396513a8d 478 16cb2acec887d5861327e04f2705b8ce 1003.8 1864.2 334.6 621.4 80 64 80 80 0 0 +locus_16:15:0:7 7 15 488 488 1 488 1 488 488 14 97.131 100 100 0.0 972.0 15 locus_16 SALM_780 < murein transglycosylase A P 1464 16e55766c603fe33c9e75d8e81743ae2 488 b20314e55f9713235e9c4ea5817b56df 1024.8 1903.2 341.6 634.4 80 64 80 80 0 0 +locus_17:16:0:8 8 16 612 612 1 612 1 612 612 0 100.0 100 100 0.0 1241.0 16 locus_17 SALM_1937 > GTPase HflX Q 1836 a0d97d985483413f3c18bfe5833ae9ce 612 d6012eddc7a6e8d7d40761d00ed71a5a 1285.2 2386.8 428.4 795.6 80 64 80 80 0 0 +locus_18:17:0:9 9 17 638 638 1 638 1 638 638 0 100.0 100 100 0.0 1326.0 17 locus_18 SALM_1997 ? biosynthetic arginine decarboxylase R 1914 b3021e979faa7600756c06dfadfcf14c 638 e68e83956ee1d4c685571e5348c8def1 1339.8 2488.2 446.6 829.4 80 64 80 80 0 0 +locus_19:18:0:10 10 18 679 679 1 679 1 679 679 8 98.822 100 100 0.0 1403.0 18 locus_19 SALM_9926 , https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662 S 2037 a012eee23637b48e39b00808a057e35d 679 1302fea1dbd5bb756db34e09b863ad44 1425.9 2648.1 475.3 882.7 80 64 80 80 0 0 +locus_2:1:0:11 11 1 95 95 1 95 1 95 95 11 88.421 100 100 2.89e-51 147.0 1 locus_2 SALM_120 @ outer membrane protein B 285 e35184c8ff18e9116fc8faef20532f56 95 2d9e7f7f0d11bf02db860b0799e7924d 199.5 370.5 66.5 123.5 80 64 80 80 0 0 +locus_20:19:0:12 12 19 1645 1645 1 1645 1 1645 1645 0 100.0 100 100 0.0 3332.0 19 locus_20 SALM_6064 . alpha-2-macroglobulin family protein T 4935 4461918e985715e4a2b07494e1f91326 1645 12ff39a1933fa478729b142976b0a659 3454.5 6415.5 1151.5 2138.5 80 64 80 80 0 0 +locus_3:2:0:13 13 2 109 109 1 109 1 109 109 0 100.0 100 100 3.09e-81 224.0 2 locus_3 SALM_2016 # tRNA (guanosine(46)-N7)-methyltransferase TrmB C 327 670705cd2a59c4a23a897ac656a888fe 109 4277f8bbf1fd6682001da089fea83d04 228.9 425.1 76.3 141.7 80 64 80 80 0 0 +locus_4:3:0:14 14 3 139 139 1 139 1 139 139 9 93.525 100 100 3.6999999999999996e-98 269.0 3 locus_4 SALM_8644 $ AZ624_004720 AZ624_004720 417 ac1b21798c0f672ad26f5a91ea278590 139 0c25367401155278f34832f184ab44e6 291.9 542.1 97.3 180.7 80 64 80 80 0 0 +locus_5:4:0:15 15 4 148 148 1 148 1 148 148 11 92.568 100 100 1.5600000000000001e-97 268.0 4 locus_5 SALM_1876 % SPI-1 type III secretion system invasion lipoprotein InvH E 444 d00defcca8588f21ce16fa1d0ac13389 148 c8bf12a8057fc5e541fcd4924136a40d 310.8 577.2 103.6 192.4 80 64 80 80 0 0 +locus_6:5:0:16 16 5 181 181 1 181 1 181 181 0 100.0 100 100 1.7899999999999999e-140 380.0 5 locus_6 SALM_640 ^ MOSC domain-containing protein F 543 a11561f2804e2c32c78049f8b9aeb517 181 1f6a45ea291940ef2c17ec1cfdd5fbdd 380.1 705.9 126.7 235.3 80 64 80 80 0 0 +locus_7:6:0:17 17 6 202 202 1 202 1 202 202 14 93.069 100 100 2.36e-141 384.0 6 locus_7 SALM_1501 & India: Vellore G 606 dc94bf1ec4ff9bed2a1f460cbd958656 202 62252b3326997117f127efb88ff09294 424.2 787.8 141.4 262.6 80 64 80 80 0 0 +locus_8:7:0:18 18 7 214 214 1 214 1 214 214 0 100.0 100 100 1.75e-157 426.0 7 locus_8 SALM_756 * DNA polymerase III subunit delta' H 642 7ebe74afecf146ec4db816c8deced64f 214 2449629747a7c58f0f2cad411db87178 449.4 834.6 149.8 278.2 80 64 80 80 0 0 +locus_9:8:0:19 19 8 228 228 1 228 1 228 228 0 100.0 100 100 6.32e-172 463.0 8 locus_9 SALM_7353 1 fimbrial assembly chaperone I 684 41ebb36872854b2b33c8c028e23d8ad1 228 a75a93991228940fb46d917f238beb5a 478.8 889.2 159.6 296.4 80 64 80 80 0 0 diff --git a/tests/test_data/outputs/report/normal/G10/nucleotide.hits.txt b/tests/test_data/outputs/report/normal/G10/nucleotide.hits.txt new file mode 100755 index 0000000..fbba040 --- /dev/null +++ b/tests/test_data/outputs/report/normal/G10/nucleotide.hits.txt @@ -0,0 +1,20 @@ +query_name qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore seq_id locus_name locus_name_alt locus_product locus_description locus_uid dna_seq_len dna_seq_hash aa_seq_len aa_seq_hash dna_min_len dna_max_len aa_min_len aa_max_len dna_min_ident aa_min_ident min_dna_match_cov min_aa_match_cov count_int_stops dna_ambig_count +locus_1:0:0:0 0 0 102 102 1 102 1 102 102 0 100.0 100 100 plus 1.8e-51 189 0 locus_1 SALM_11273 ! hypothetical protein 1 102 d17b02d12afa7f832ee37df6f24a8f55 34 84ac553cb45dd790c497c27152888f02 71.4 132.6 23.8 44.2 80 64 80 80 0 0 +locus_10:9:0:1 1 9 762 762 1 762 1 762 762 0 100.0 100 100 plus 0.0 1408 9 locus_10 SALM_1891 200.96 5'/3'-nucleotidase SurE J 762 fe04d17ec353c08b903c85fc0ca4dc02 254 bd09702e070040e0fc8d2ec3b830812c 533.4 990.6 177.8 330.2 80 64 80 80 0 0 +locus_12:11:0:2 2 11 972 972 1 972 1 972 972 0 100.0 100 100 plus 0.0 1796 11 locus_12 SALM_11020 _ 1 L 972 eb72da68c159497d5f0c8eeddc51b5ae 324 a1f16dd269dc295e715f2d00f9c26b43 680.4 1263.6 226.8 421.2 80 64 80 80 0 0 +locus_13:12:0:3 3 12 1098 1098 1 1098 1 1098 1098 0 100.0 100 100 plus 0.0 2028 12 locus_13 SALM_1934 - 3.9 M 1098 8f300259dcb46224bdc1fe5273107324 366 b16bc26e42f5bc4327504b1ec8b2d53d 768.6 1427.4 256.2 475.8 80 64 80 80 0 0 +locus_14:13:0:4 4 13 1281 1281 1 1281 1 1281 1281 0 100.0 100 100 plus 0.0 2366 13 locus_14 SALM_2871 + @ N 1281 b9060019038526aa6fc38d2f7510edc6 427 ed11561b2bedaa12c7a28eb0e9346101 896.7 1665.3 298.9 555.1 80 64 80 80 0 0 +locus_15:14:0:5 5 14 1434 1434 1 1434 1 1434 1434 0 100.0 100 100 plus 0.0 2649 14 locus_15 SALM_583 = DMT family transporter O 1434 bc98c2fe196a68a79036814396513a8d 478 16cb2acec887d5861327e04f2705b8ce 1003.8 1864.2 334.6 621.4 80 64 80 80 0 0 +locus_16:15:0:6 6 15 1464 1464 1 1464 1 1464 1464 0 100.0 100 100 plus 0.0 2704 15 locus_16 SALM_780 < murein transglycosylase A P 1464 16e55766c603fe33c9e75d8e81743ae2 488 b20314e55f9713235e9c4ea5817b56df 1024.8 1903.2 341.6 634.4 80 64 80 80 0 0 +locus_17:16:0:7 7 16 1836 1836 1 1836 1 1836 1836 0 100.0 100 100 plus 0.0 3391 16 locus_17 SALM_1937 > GTPase HflX Q 1836 a0d97d985483413f3c18bfe5833ae9ce 612 d6012eddc7a6e8d7d40761d00ed71a5a 1285.2 2386.8 428.4 795.6 80 64 80 80 0 0 +locus_18:17:0:8 8 17 1914 1914 1 1914 1 1914 1914 0 100.0 100 100 plus 0.0 3535 17 locus_18 SALM_1997 ? biosynthetic arginine decarboxylase R 1914 b3021e979faa7600756c06dfadfcf14c 638 e68e83956ee1d4c685571e5348c8def1 1339.8 2488.2 446.6 829.4 80 64 80 80 0 0 +locus_19:18:0:9 9 18 2037 2037 1 2037 1 2037 2037 0 100.0 100 100 plus 0.0 3762 18 locus_19 SALM_9926 , https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662 S 2037 a012eee23637b48e39b00808a057e35d 679 1302fea1dbd5bb756db34e09b863ad44 1425.9 2648.1 475.3 882.7 80 64 80 80 0 0 +locus_2:1:0:10 10 1 285 285 1 285 1 285 285 0 100.0 100 100 plus 1.03e-152 527 1 locus_2 SALM_120 @ outer membrane protein B 285 e35184c8ff18e9116fc8faef20532f56 95 2d9e7f7f0d11bf02db860b0799e7924d 199.5 370.5 66.5 123.5 80 64 80 80 0 0 +locus_20:19:0:11 11 19 4935 4935 1 4935 1 4935 4935 0 100.0 100 100 plus 0.0 9114 19 locus_20 SALM_6064 . alpha-2-macroglobulin family protein T 4935 4461918e985715e4a2b07494e1f91326 1645 12ff39a1933fa478729b142976b0a659 3454.5 6415.5 1151.5 2138.5 80 64 80 80 0 0 +locus_3:2:0:12 12 2 327 327 1 327 1 327 327 0 100.0 100 100 plus 5.34e-176 604 2 locus_3 SALM_2016 # tRNA (guanosine(46)-N7)-methyltransferase TrmB C 327 670705cd2a59c4a23a897ac656a888fe 109 4277f8bbf1fd6682001da089fea83d04 228.9 425.1 76.3 141.7 80 64 80 80 0 0 +locus_4:3:0:13 13 3 417 417 1 417 1 417 417 0 100.0 100 100 plus 0.0 771 3 locus_4 SALM_8644 $ AZ624_004720 AZ624_004720 417 ac1b21798c0f672ad26f5a91ea278590 139 0c25367401155278f34832f184ab44e6 291.9 542.1 97.3 180.7 80 64 80 80 0 0 +locus_5:4:0:14 14 4 444 444 1 444 1 444 444 0 100.0 100 100 plus 0.0 821 4 locus_5 SALM_1876 % SPI-1 type III secretion system invasion lipoprotein InvH E 444 d00defcca8588f21ce16fa1d0ac13389 148 c8bf12a8057fc5e541fcd4924136a40d 310.8 577.2 103.6 192.4 80 64 80 80 0 0 +locus_6:5:0:15 15 5 543 543 1 543 1 543 543 0 100.0 100 100 plus 0.0 1003 5 locus_6 SALM_640 ^ MOSC domain-containing protein F 543 a11561f2804e2c32c78049f8b9aeb517 181 1f6a45ea291940ef2c17ec1cfdd5fbdd 380.1 705.9 126.7 235.3 80 64 80 80 0 0 +locus_7:6:0:16 16 6 606 606 1 606 1 606 606 0 100.0 100 100 plus 0.0 1120 6 locus_7 SALM_1501 & India: Vellore G 606 dc94bf1ec4ff9bed2a1f460cbd958656 202 62252b3326997117f127efb88ff09294 424.2 787.8 141.4 262.6 80 64 80 80 0 0 +locus_8:7:0:17 17 7 642 642 1 642 1 642 642 0 100.0 100 100 plus 0.0 1186 7 locus_8 SALM_756 * DNA polymerase III subunit delta' H 642 7ebe74afecf146ec4db816c8deced64f 214 2449629747a7c58f0f2cad411db87178 449.4 834.6 149.8 278.2 80 64 80 80 0 0 +locus_9:8:0:18 18 8 684 684 1 684 1 684 684 0 100.0 100 100 plus 0.0 1264 8 locus_9 SALM_7353 1 fimbrial assembly chaperone I 684 41ebb36872854b2b33c8c028e23d8ad1 228 a75a93991228940fb46d917f238beb5a 478.8 889.2 159.6 296.4 80 64 80 80 0 0 diff --git a/tests/test_data/outputs/report/normal/G10/profile.json b/tests/test_data/outputs/report/normal/G10/profile.json new file mode 100755 index 0000000..3c394e9 --- /dev/null +++ b/tests/test_data/outputs/report/normal/G10/profile.json @@ -0,0 +1,24 @@ +{ + "G10": { + "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", + "locus_2": "e35184c8ff18e9116fc8faef20532f56", + "locus_3": "670705cd2a59c4a23a897ac656a888fe", + "locus_4": "ac1b21798c0f672ad26f5a91ea278590", + "locus_5": "d00defcca8588f21ce16fa1d0ac13389", + "locus_6": "a11561f2804e2c32c78049f8b9aeb517", + "locus_7": "dc94bf1ec4ff9bed2a1f460cbd958656", + "locus_8": "7ebe74afecf146ec4db816c8deced64f", + "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", + "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", + "locus_11": "-", + "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", + "locus_13": "8f300259dcb46224bdc1fe5273107324", + "locus_14": "b9060019038526aa6fc38d2f7510edc6", + "locus_15": "bc98c2fe196a68a79036814396513a8d", + "locus_16": "16e55766c603fe33c9e75d8e81743ae2", + "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", + "locus_18": "b3021e979faa7600756c06dfadfcf14c", + "locus_19": "a012eee23637b48e39b00808a057e35d", + "locus_20": "4461918e985715e4a2b07494e1f91326" + } +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/normal/G10/protein.hits.txt b/tests/test_data/outputs/report/normal/G10/protein.hits.txt new file mode 100755 index 0000000..36030d1 --- /dev/null +++ b/tests/test_data/outputs/report/normal/G10/protein.hits.txt @@ -0,0 +1,20 @@ +query_name qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore seq_id locus_name locus_name_alt locus_product locus_description locus_uid dna_seq_len dna_seq_hash aa_seq_len aa_seq_hash dna_min_len dna_max_len aa_min_len aa_max_len dna_min_ident aa_min_ident min_dna_match_cov min_aa_match_cov count_int_stops dna_ambig_count +locus_1:0:0:0 0 0 34 34 1 34 1 34 34 0 100.0 100 100 4.94e-20 64.3 0 locus_1 SALM_11273 ! hypothetical protein 1 102 d17b02d12afa7f832ee37df6f24a8f55 34 84ac553cb45dd790c497c27152888f02 71.4 132.6 23.8 44.2 80 64 80 80 0 0 +locus_10:9:0:1 1 9 254 254 1 254 1 254 254 0 100.0 100 100 0.0 514.0 9 locus_10 SALM_1891 200.96 5'/3'-nucleotidase SurE J 762 fe04d17ec353c08b903c85fc0ca4dc02 254 bd09702e070040e0fc8d2ec3b830812c 533.4 990.6 177.8 330.2 80 64 80 80 0 0 +locus_12:11:0:2 2 11 324 324 1 324 1 324 324 0 100.0 100 100 0.0 640.0 11 locus_12 SALM_11020 _ 1 L 972 eb72da68c159497d5f0c8eeddc51b5ae 324 a1f16dd269dc295e715f2d00f9c26b43 680.4 1263.6 226.8 421.2 80 64 80 80 0 0 +locus_13:12:0:3 3 12 366 366 1 366 1 366 366 0 100.0 100 100 0.0 754.0 12 locus_13 SALM_1934 - 3.9 M 1098 8f300259dcb46224bdc1fe5273107324 366 b16bc26e42f5bc4327504b1ec8b2d53d 768.6 1427.4 256.2 475.8 80 64 80 80 0 0 +locus_14:13:0:4 4 13 427 427 1 427 1 427 427 0 100.0 100 100 0.0 869.0 13 locus_14 SALM_2871 + @ N 1281 b9060019038526aa6fc38d2f7510edc6 427 ed11561b2bedaa12c7a28eb0e9346101 896.7 1665.3 298.9 555.1 80 64 80 80 0 0 +locus_15:14:0:5 5 14 478 478 1 478 1 478 478 0 100.0 100 100 0.0 972.0 14 locus_15 SALM_583 = DMT family transporter O 1434 bc98c2fe196a68a79036814396513a8d 478 16cb2acec887d5861327e04f2705b8ce 1003.8 1864.2 334.6 621.4 80 64 80 80 0 0 +locus_16:15:0:6 6 15 488 488 1 488 1 488 488 0 100.0 100 100 0.0 1004.0 15 locus_16 SALM_780 < murein transglycosylase A P 1464 16e55766c603fe33c9e75d8e81743ae2 488 b20314e55f9713235e9c4ea5817b56df 1024.8 1903.2 341.6 634.4 80 64 80 80 0 0 +locus_17:16:0:7 7 16 612 612 1 612 1 612 612 0 100.0 100 100 0.0 1241.0 16 locus_17 SALM_1937 > GTPase HflX Q 1836 a0d97d985483413f3c18bfe5833ae9ce 612 d6012eddc7a6e8d7d40761d00ed71a5a 1285.2 2386.8 428.4 795.6 80 64 80 80 0 0 +locus_18:17:0:8 8 17 638 638 1 638 1 638 638 0 100.0 100 100 0.0 1326.0 17 locus_18 SALM_1997 ? biosynthetic arginine decarboxylase R 1914 b3021e979faa7600756c06dfadfcf14c 638 e68e83956ee1d4c685571e5348c8def1 1339.8 2488.2 446.6 829.4 80 64 80 80 0 0 +locus_19:18:0:9 9 18 679 679 1 679 1 679 679 0 100.0 100 100 0.0 1419.0 18 locus_19 SALM_9926 , https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662 S 2037 a012eee23637b48e39b00808a057e35d 679 1302fea1dbd5bb756db34e09b863ad44 1425.9 2648.1 475.3 882.7 80 64 80 80 0 0 +locus_2:1:0:10 10 1 95 95 1 95 1 95 95 0 100.0 100 100 1.2299999999999998e-68 191.0 1 locus_2 SALM_120 @ outer membrane protein B 285 e35184c8ff18e9116fc8faef20532f56 95 2d9e7f7f0d11bf02db860b0799e7924d 199.5 370.5 66.5 123.5 80 64 80 80 0 0 +locus_20:19:0:11 11 19 1645 1645 1 1645 1 1645 1645 0 100.0 100 100 0.0 3332.0 19 locus_20 SALM_6064 . alpha-2-macroglobulin family protein T 4935 4461918e985715e4a2b07494e1f91326 1645 12ff39a1933fa478729b142976b0a659 3454.5 6415.5 1151.5 2138.5 80 64 80 80 0 0 +locus_3:2:0:12 12 2 109 109 1 109 1 109 109 0 100.0 100 100 3.09e-81 224.0 2 locus_3 SALM_2016 # tRNA (guanosine(46)-N7)-methyltransferase TrmB C 327 670705cd2a59c4a23a897ac656a888fe 109 4277f8bbf1fd6682001da089fea83d04 228.9 425.1 76.3 141.7 80 64 80 80 0 0 +locus_4:3:0:13 13 3 139 139 1 139 1 139 139 0 100.0 100 100 2.34e-106 290.0 3 locus_4 SALM_8644 $ AZ624_004720 AZ624_004720 417 ac1b21798c0f672ad26f5a91ea278590 139 0c25367401155278f34832f184ab44e6 291.9 542.1 97.3 180.7 80 64 80 80 0 0 +locus_5:4:0:14 14 4 148 148 1 148 1 148 148 0 100.0 100 100 1.0400000000000001e-110 302.0 4 locus_5 SALM_1876 % SPI-1 type III secretion system invasion lipoprotein InvH E 444 d00defcca8588f21ce16fa1d0ac13389 148 c8bf12a8057fc5e541fcd4924136a40d 310.8 577.2 103.6 192.4 80 64 80 80 0 0 +locus_6:5:0:15 15 5 181 181 1 181 1 181 181 0 100.0 100 100 1.7899999999999999e-140 380.0 5 locus_6 SALM_640 ^ MOSC domain-containing protein F 543 a11561f2804e2c32c78049f8b9aeb517 181 1f6a45ea291940ef2c17ec1cfdd5fbdd 380.1 705.9 126.7 235.3 80 64 80 80 0 0 +locus_7:6:0:16 16 6 202 202 1 202 1 202 202 0 100.0 100 100 3.71e-154 416.0 6 locus_7 SALM_1501 & India: Vellore G 606 dc94bf1ec4ff9bed2a1f460cbd958656 202 62252b3326997117f127efb88ff09294 424.2 787.8 141.4 262.6 80 64 80 80 0 0 +locus_8:7:0:17 17 7 214 214 1 214 1 214 214 0 100.0 100 100 1.75e-157 426.0 7 locus_8 SALM_756 * DNA polymerase III subunit delta' H 642 7ebe74afecf146ec4db816c8deced64f 214 2449629747a7c58f0f2cad411db87178 449.4 834.6 149.8 278.2 80 64 80 80 0 0 +locus_9:8:0:18 18 8 228 228 1 228 1 228 228 0 100.0 100 100 6.32e-172 463.0 8 locus_9 SALM_7353 1 fimbrial assembly chaperone I 684 41ebb36872854b2b33c8c028e23d8ad1 228 a75a93991228940fb46d917f238beb5a 478.8 889.2 159.6 296.4 80 64 80 80 0 0 diff --git a/tests/test_data/outputs/report/normal/G11/nucleotide.hits.txt b/tests/test_data/outputs/report/normal/G11/nucleotide.hits.txt new file mode 100755 index 0000000..b2f81da --- /dev/null +++ b/tests/test_data/outputs/report/normal/G11/nucleotide.hits.txt @@ -0,0 +1,22 @@ +query_name qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore seq_id locus_name locus_name_alt locus_product locus_description locus_uid dna_seq_len dna_seq_hash aa_seq_len aa_seq_hash dna_min_len dna_max_len aa_min_len aa_max_len dna_min_ident aa_min_ident min_dna_match_cov min_aa_match_cov count_int_stops dna_ambig_count +locus_1:0:0:0 0 0 102 102 1 102 1 102 102 0 100.0 100 100 plus 1.8e-51 189 0 locus_1 SALM_11273 ! hypothetical protein 1 102 d17b02d12afa7f832ee37df6f24a8f55 34 84ac553cb45dd790c497c27152888f02 71.4 132.6 23.8 44.2 80 64 80 80 0 0 +locus_10:9:0:1 1 9 762 762 1 762 1 762 762 0 100.0 100 100 plus 0.0 1408 9 locus_10 SALM_1891 200.96 5'/3'-nucleotidase SurE J 762 fe04d17ec353c08b903c85fc0ca4dc02 254 bd09702e070040e0fc8d2ec3b830812c 533.4 990.6 177.8 330.2 80 64 80 80 0 0 +locus_11:10:0:2 2 10 858 858 1 858 1 858 858 19 97.786 100 100 plus 0.0 1480 10 locus_11 SALM_1452 | 1-phosphofructokinase K 858 5b128d659955716833ce42f2bb060212 286 4daecf1a6ccae76e1d97a4ce9ee4ff0b 600.6 1115.4 200.2 371.8 80 64 80 80 0 0 +locus_12:11:0:3 3 11 972 972 1 972 1 972 972 0 100.0 100 100 plus 0.0 1796 11 locus_12 SALM_11020 _ 1 L 972 eb72da68c159497d5f0c8eeddc51b5ae 324 a1f16dd269dc295e715f2d00f9c26b43 680.4 1263.6 226.8 421.2 80 64 80 80 0 0 +locus_12:11:0:4 4 11 972 972 1 972 1 972 972 0 100.0 100 100 plus 0.0 1796 11 locus_12 SALM_11020 _ 1 L 972 eb72da68c159497d5f0c8eeddc51b5ae 324 a1f16dd269dc295e715f2d00f9c26b43 680.4 1263.6 226.8 421.2 80 64 80 80 0 0 +locus_13:12:0:5 5 12 1098 1098 1 1098 1 1098 1098 0 100.0 100 100 plus 0.0 2028 12 locus_13 SALM_1934 - 3.9 M 1098 8f300259dcb46224bdc1fe5273107324 366 b16bc26e42f5bc4327504b1ec8b2d53d 768.6 1427.4 256.2 475.8 80 64 80 80 0 0 +locus_14:13:0:6 6 13 1281 1281 1 1281 1 1281 1281 11 99.141 100 100 plus 0.0 2305 13 locus_14 SALM_2871 + @ N 1281 b9060019038526aa6fc38d2f7510edc6 427 ed11561b2bedaa12c7a28eb0e9346101 896.7 1665.3 298.9 555.1 80 64 80 80 0 0 +locus_15:14:0:7 7 14 1434 1434 1 1434 1 1434 1434 0 100.0 100 100 plus 0.0 2649 14 locus_15 SALM_583 = DMT family transporter O 1434 bc98c2fe196a68a79036814396513a8d 478 16cb2acec887d5861327e04f2705b8ce 1003.8 1864.2 334.6 621.4 80 64 80 80 0 0 +locus_16:15:0:8 8 15 1464 1464 1 1464 1 1464 1464 15 98.975 100 100 plus 0.0 2621 15 locus_16 SALM_780 < murein transglycosylase A P 1464 16e55766c603fe33c9e75d8e81743ae2 488 b20314e55f9713235e9c4ea5817b56df 1024.8 1903.2 341.6 634.4 80 64 80 80 0 0 +locus_17:16:0:9 9 16 1836 1836 1 1836 1 1836 1836 0 100.0 100 100 plus 0.0 3391 16 locus_17 SALM_1937 > GTPase HflX Q 1836 a0d97d985483413f3c18bfe5833ae9ce 612 d6012eddc7a6e8d7d40761d00ed71a5a 1285.2 2386.8 428.4 795.6 80 64 80 80 0 0 +locus_18:17:0:10 10 17 1914 1914 1 1914 1 1914 1914 0 100.0 100 100 plus 0.0 3535 17 locus_18 SALM_1997 ? biosynthetic arginine decarboxylase R 1914 b3021e979faa7600756c06dfadfcf14c 638 e68e83956ee1d4c685571e5348c8def1 1339.8 2488.2 446.6 829.4 80 64 80 80 0 0 +locus_19:18:0:11 11 18 2037 2037 1 2037 1 2037 2037 16 99.215 100 100 plus 0.0 3674 18 locus_19 SALM_9926 , https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662 S 2037 a012eee23637b48e39b00808a057e35d 679 1302fea1dbd5bb756db34e09b863ad44 1425.9 2648.1 475.3 882.7 80 64 80 80 0 0 +locus_2:1:0:12 12 1 285 285 1 285 1 285 285 17 94.035 100 100 plus 2.3100000000000004e-124 433 1 locus_2 SALM_120 @ outer membrane protein B 285 e35184c8ff18e9116fc8faef20532f56 95 2d9e7f7f0d11bf02db860b0799e7924d 199.5 370.5 66.5 123.5 80 64 80 80 0 0 +locus_20:19:0:13 13 19 4935 4935 1 4935 1 4935 4935 0 100.0 100 100 plus 0.0 9114 19 locus_20 SALM_6064 . alpha-2-macroglobulin family protein T 4935 4461918e985715e4a2b07494e1f91326 1645 12ff39a1933fa478729b142976b0a659 3454.5 6415.5 1151.5 2138.5 80 64 80 80 0 0 +locus_3:2:0:14 14 2 327 327 1 327 1 327 327 0 100.0 100 100 plus 5.34e-176 604 2 locus_3 SALM_2016 # tRNA (guanosine(46)-N7)-methyltransferase TrmB C 327 670705cd2a59c4a23a897ac656a888fe 109 4277f8bbf1fd6682001da089fea83d04 228.9 425.1 76.3 141.7 80 64 80 80 0 0 +locus_4:3:0:15 15 3 417 417 1 417 1 417 417 11 97.362 100 100 plus 0.0 710 3 locus_4 SALM_8644 $ AZ624_004720 AZ624_004720 417 ac1b21798c0f672ad26f5a91ea278590 139 0c25367401155278f34832f184ab44e6 291.9 542.1 97.3 180.7 80 64 80 80 0 0 +locus_5:4:0:16 16 4 444 444 1 444 1 444 444 15 96.622 100 100 plus 0.0 737 4 locus_5 SALM_1876 % SPI-1 type III secretion system invasion lipoprotein InvH E 444 d00defcca8588f21ce16fa1d0ac13389 148 c8bf12a8057fc5e541fcd4924136a40d 310.8 577.2 103.6 192.4 80 64 80 80 0 0 +locus_6:5:0:17 17 5 543 543 1 543 1 543 543 0 100.0 100 100 plus 0.0 1003 5 locus_6 SALM_640 ^ MOSC domain-containing protein F 543 a11561f2804e2c32c78049f8b9aeb517 181 1f6a45ea291940ef2c17ec1cfdd5fbdd 380.1 705.9 126.7 235.3 80 64 80 80 0 0 +locus_7:6:0:18 18 6 606 606 1 606 1 606 606 15 97.525 100 100 plus 0.0 1037 6 locus_7 SALM_1501 & India: Vellore G 606 dc94bf1ec4ff9bed2a1f460cbd958656 202 62252b3326997117f127efb88ff09294 424.2 787.8 141.4 262.6 80 64 80 80 0 0 +locus_8:7:0:19 19 7 642 642 1 642 1 642 642 0 100.0 100 100 plus 0.0 1186 7 locus_8 SALM_756 * DNA polymerase III subunit delta' H 642 7ebe74afecf146ec4db816c8deced64f 214 2449629747a7c58f0f2cad411db87178 449.4 834.6 149.8 278.2 80 64 80 80 0 0 +locus_9:8:0:20 20 8 684 684 1 684 1 684 684 0 100.0 100 100 plus 0.0 1264 8 locus_9 SALM_7353 1 fimbrial assembly chaperone I 684 41ebb36872854b2b33c8c028e23d8ad1 228 a75a93991228940fb46d917f238beb5a 478.8 889.2 159.6 296.4 80 64 80 80 0 0 diff --git a/tests/test_data/outputs/report/normal/G11/profile.json b/tests/test_data/outputs/report/normal/G11/profile.json new file mode 100755 index 0000000..65802a8 --- /dev/null +++ b/tests/test_data/outputs/report/normal/G11/profile.json @@ -0,0 +1,24 @@ +{ + "G11": { + "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", + "locus_2": "8b70e777f6bbf2c91ff75947824b5976", + "locus_3": "670705cd2a59c4a23a897ac656a888fe", + "locus_4": "73790840c76943caac0ebb3b2b3f0b98", + "locus_5": "8cf4341689dd00f74adfcc43d1f4a35e", + "locus_6": "a11561f2804e2c32c78049f8b9aeb517", + "locus_7": "49d9878c9d3071aa1d2f26cb947b784c", + "locus_8": "7ebe74afecf146ec4db816c8deced64f", + "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", + "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", + "locus_11": "c4266f2f24fdd8e039113c6b0955af9f", + "locus_12": "60934464690fea26102d1c8c9acb755d", + "locus_13": "8f300259dcb46224bdc1fe5273107324", + "locus_14": "2fa0b06ed72e36b4071cab9d0b4f87d0", + "locus_15": "bc98c2fe196a68a79036814396513a8d", + "locus_16": "a9b3cb97dac3cda6e932a49bf9a507bd", + "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", + "locus_18": "b3021e979faa7600756c06dfadfcf14c", + "locus_19": "de32372598811d63bcc1a0eaf6872644", + "locus_20": "4461918e985715e4a2b07494e1f91326" + } +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/normal/G11/protein.hits.txt b/tests/test_data/outputs/report/normal/G11/protein.hits.txt new file mode 100755 index 0000000..33fcef0 --- /dev/null +++ b/tests/test_data/outputs/report/normal/G11/protein.hits.txt @@ -0,0 +1,22 @@ +query_name qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore seq_id locus_name locus_name_alt locus_product locus_description locus_uid dna_seq_len dna_seq_hash aa_seq_len aa_seq_hash dna_min_len dna_max_len aa_min_len aa_max_len dna_min_ident aa_min_ident min_dna_match_cov min_aa_match_cov count_int_stops dna_ambig_count +locus_1:0:0:0 0 0 34 34 1 34 1 34 34 0 100.0 100 100 4.94e-20 64.3 0 locus_1 SALM_11273 ! hypothetical protein 1 102 d17b02d12afa7f832ee37df6f24a8f55 34 84ac553cb45dd790c497c27152888f02 71.4 132.6 23.8 44.2 80 64 80 80 0 0 +locus_10:9:0:1 1 9 254 254 1 254 1 254 254 0 100.0 100 100 0.0 514.0 9 locus_10 SALM_1891 200.96 5'/3'-nucleotidase SurE J 762 fe04d17ec353c08b903c85fc0ca4dc02 254 bd09702e070040e0fc8d2ec3b830812c 533.4 990.6 177.8 330.2 80 64 80 80 0 0 +locus_11:10:0:2 2 10 286 286 1 286 1 286 286 17 94.056 100 100 0.0 518.0 10 locus_11 SALM_1452 | 1-phosphofructokinase K 858 5b128d659955716833ce42f2bb060212 286 4daecf1a6ccae76e1d97a4ce9ee4ff0b 600.6 1115.4 200.2 371.8 80 64 80 80 0 0 +locus_12:11:0:3 3 11 324 324 1 324 1 324 324 0 100.0 100 100 0.0 640.0 11 locus_12 SALM_11020 _ 1 L 972 eb72da68c159497d5f0c8eeddc51b5ae 324 a1f16dd269dc295e715f2d00f9c26b43 680.4 1263.6 226.8 421.2 80 64 80 80 0 0 +locus_12:11:0:4 4 11 324 324 1 324 1 324 324 0 100.0 100 100 0.0 640.0 11 locus_12 SALM_11020 _ 1 L 972 eb72da68c159497d5f0c8eeddc51b5ae 324 a1f16dd269dc295e715f2d00f9c26b43 680.4 1263.6 226.8 421.2 80 64 80 80 0 0 +locus_13:12:0:5 5 12 366 366 1 366 1 366 366 0 100.0 100 100 0.0 754.0 12 locus_13 SALM_1934 - 3.9 M 1098 8f300259dcb46224bdc1fe5273107324 366 b16bc26e42f5bc4327504b1ec8b2d53d 768.6 1427.4 256.2 475.8 80 64 80 80 0 0 +locus_14:13:0:6 6 13 427 427 1 427 1 427 427 9 97.892 100 100 0.0 846.0 13 locus_14 SALM_2871 + @ N 1281 b9060019038526aa6fc38d2f7510edc6 427 ed11561b2bedaa12c7a28eb0e9346101 896.7 1665.3 298.9 555.1 80 64 80 80 0 0 +locus_15:14:0:7 7 14 478 478 1 478 1 478 478 0 100.0 100 100 0.0 972.0 14 locus_15 SALM_583 = DMT family transporter O 1434 bc98c2fe196a68a79036814396513a8d 478 16cb2acec887d5861327e04f2705b8ce 1003.8 1864.2 334.6 621.4 80 64 80 80 0 0 +locus_16:15:0:8 8 15 488 488 1 488 1 488 488 14 97.131 100 100 0.0 972.0 15 locus_16 SALM_780 < murein transglycosylase A P 1464 16e55766c603fe33c9e75d8e81743ae2 488 b20314e55f9713235e9c4ea5817b56df 1024.8 1903.2 341.6 634.4 80 64 80 80 0 0 +locus_17:16:0:9 9 16 612 612 1 612 1 612 612 0 100.0 100 100 0.0 1241.0 16 locus_17 SALM_1937 > GTPase HflX Q 1836 a0d97d985483413f3c18bfe5833ae9ce 612 d6012eddc7a6e8d7d40761d00ed71a5a 1285.2 2386.8 428.4 795.6 80 64 80 80 0 0 +locus_18:17:0:10 10 17 638 638 1 638 1 638 638 0 100.0 100 100 0.0 1326.0 17 locus_18 SALM_1997 ? biosynthetic arginine decarboxylase R 1914 b3021e979faa7600756c06dfadfcf14c 638 e68e83956ee1d4c685571e5348c8def1 1339.8 2488.2 446.6 829.4 80 64 80 80 0 0 +locus_19:18:0:11 11 18 679 679 1 679 1 679 679 8 98.822 100 100 0.0 1403.0 18 locus_19 SALM_9926 , https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662 S 2037 a012eee23637b48e39b00808a057e35d 679 1302fea1dbd5bb756db34e09b863ad44 1425.9 2648.1 475.3 882.7 80 64 80 80 0 0 +locus_2:1:0:12 12 1 95 95 1 95 1 95 95 11 88.421 100 100 2.89e-51 147.0 1 locus_2 SALM_120 @ outer membrane protein B 285 e35184c8ff18e9116fc8faef20532f56 95 2d9e7f7f0d11bf02db860b0799e7924d 199.5 370.5 66.5 123.5 80 64 80 80 0 0 +locus_20:19:0:13 13 19 1645 1645 1 1645 1 1645 1645 0 100.0 100 100 0.0 3332.0 19 locus_20 SALM_6064 . alpha-2-macroglobulin family protein T 4935 4461918e985715e4a2b07494e1f91326 1645 12ff39a1933fa478729b142976b0a659 3454.5 6415.5 1151.5 2138.5 80 64 80 80 0 0 +locus_3:2:0:14 14 2 109 109 1 109 1 109 109 0 100.0 100 100 3.09e-81 224.0 2 locus_3 SALM_2016 # tRNA (guanosine(46)-N7)-methyltransferase TrmB C 327 670705cd2a59c4a23a897ac656a888fe 109 4277f8bbf1fd6682001da089fea83d04 228.9 425.1 76.3 141.7 80 64 80 80 0 0 +locus_4:3:0:15 15 3 139 139 1 139 1 139 139 9 93.525 100 100 3.6999999999999996e-98 269.0 3 locus_4 SALM_8644 $ AZ624_004720 AZ624_004720 417 ac1b21798c0f672ad26f5a91ea278590 139 0c25367401155278f34832f184ab44e6 291.9 542.1 97.3 180.7 80 64 80 80 0 0 +locus_5:4:0:16 16 4 148 148 1 148 1 148 148 11 92.568 100 100 1.5600000000000001e-97 268.0 4 locus_5 SALM_1876 % SPI-1 type III secretion system invasion lipoprotein InvH E 444 d00defcca8588f21ce16fa1d0ac13389 148 c8bf12a8057fc5e541fcd4924136a40d 310.8 577.2 103.6 192.4 80 64 80 80 0 0 +locus_6:5:0:17 17 5 181 181 1 181 1 181 181 0 100.0 100 100 1.7899999999999999e-140 380.0 5 locus_6 SALM_640 ^ MOSC domain-containing protein F 543 a11561f2804e2c32c78049f8b9aeb517 181 1f6a45ea291940ef2c17ec1cfdd5fbdd 380.1 705.9 126.7 235.3 80 64 80 80 0 0 +locus_7:6:0:18 18 6 202 202 1 202 1 202 202 14 93.069 100 100 2.36e-141 384.0 6 locus_7 SALM_1501 & India: Vellore G 606 dc94bf1ec4ff9bed2a1f460cbd958656 202 62252b3326997117f127efb88ff09294 424.2 787.8 141.4 262.6 80 64 80 80 0 0 +locus_8:7:0:19 19 7 214 214 1 214 1 214 214 0 100.0 100 100 1.75e-157 426.0 7 locus_8 SALM_756 * DNA polymerase III subunit delta' H 642 7ebe74afecf146ec4db816c8deced64f 214 2449629747a7c58f0f2cad411db87178 449.4 834.6 149.8 278.2 80 64 80 80 0 0 +locus_9:8:0:20 20 8 228 228 1 228 1 228 228 0 100.0 100 100 6.32e-172 463.0 8 locus_9 SALM_7353 1 fimbrial assembly chaperone I 684 41ebb36872854b2b33c8c028e23d8ad1 228 a75a93991228940fb46d917f238beb5a 478.8 889.2 159.6 296.4 80 64 80 80 0 0 diff --git a/tests/test_data/outputs/report/normal/G12/nucleotide.hits.txt b/tests/test_data/outputs/report/normal/G12/nucleotide.hits.txt new file mode 100755 index 0000000..119f807 --- /dev/null +++ b/tests/test_data/outputs/report/normal/G12/nucleotide.hits.txt @@ -0,0 +1,22 @@ +query_name qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore seq_id locus_name locus_name_alt locus_product locus_description locus_uid dna_seq_len dna_seq_hash aa_seq_len aa_seq_hash dna_min_len dna_max_len aa_min_len aa_max_len dna_min_ident aa_min_ident min_dna_match_cov min_aa_match_cov count_int_stops dna_ambig_count +locus_1:0:0:0 0 0 102 102 1 102 1 102 102 0 100.0 100 100 plus 1.8e-51 189 0 locus_1 SALM_11273 ! hypothetical protein 1 102 d17b02d12afa7f832ee37df6f24a8f55 34 84ac553cb45dd790c497c27152888f02 71.4 132.6 23.8 44.2 80 64 80 80 0 0 +locus_10:9:0:1 1 9 762 762 1 762 1 762 762 0 100.0 100 100 plus 0.0 1408 9 locus_10 SALM_1891 200.96 5'/3'-nucleotidase SurE J 762 fe04d17ec353c08b903c85fc0ca4dc02 254 bd09702e070040e0fc8d2ec3b830812c 533.4 990.6 177.8 330.2 80 64 80 80 0 0 +locus_11:10:0:2 2 10 858 858 1 858 1 858 858 0 100.0 100 100 plus 0.0 1585 10 locus_11 SALM_1452 | 1-phosphofructokinase K 858 5b128d659955716833ce42f2bb060212 286 4daecf1a6ccae76e1d97a4ce9ee4ff0b 600.6 1115.4 200.2 371.8 80 64 80 80 0 0 +locus_12:11:0:3 3 11 972 972 1 972 1 972 972 0 100.0 100 100 plus 0.0 1796 11 locus_12 SALM_11020 _ 1 L 972 eb72da68c159497d5f0c8eeddc51b5ae 324 a1f16dd269dc295e715f2d00f9c26b43 680.4 1263.6 226.8 421.2 80 64 80 80 0 0 +locus_12:11:0:4 4 11 972 972 1 972 1 972 972 0 100.0 100 100 plus 0.0 1796 11 locus_12 SALM_11020 _ 1 L 972 eb72da68c159497d5f0c8eeddc51b5ae 324 a1f16dd269dc295e715f2d00f9c26b43 680.4 1263.6 226.8 421.2 80 64 80 80 0 0 +locus_13:12:0:5 5 12 1098 1098 1 1098 1 1098 1098 0 100.0 100 100 plus 0.0 2028 12 locus_13 SALM_1934 - 3.9 M 1098 8f300259dcb46224bdc1fe5273107324 366 b16bc26e42f5bc4327504b1ec8b2d53d 768.6 1427.4 256.2 475.8 80 64 80 80 0 0 +locus_14:13:0:6 6 13 1281 1281 1 1281 1 1281 1281 0 100.0 100 100 plus 0.0 2366 13 locus_14 SALM_2871 + @ N 1281 b9060019038526aa6fc38d2f7510edc6 427 ed11561b2bedaa12c7a28eb0e9346101 896.7 1665.3 298.9 555.1 80 64 80 80 0 0 +locus_15:14:0:7 7 14 1434 1434 1 1434 1 1434 1434 0 100.0 100 100 plus 0.0 2649 14 locus_15 SALM_583 = DMT family transporter O 1434 bc98c2fe196a68a79036814396513a8d 478 16cb2acec887d5861327e04f2705b8ce 1003.8 1864.2 334.6 621.4 80 64 80 80 0 0 +locus_16:15:0:8 8 15 1464 1464 1 1464 1 1464 1464 0 100.0 100 100 plus 0.0 2704 15 locus_16 SALM_780 < murein transglycosylase A P 1464 16e55766c603fe33c9e75d8e81743ae2 488 b20314e55f9713235e9c4ea5817b56df 1024.8 1903.2 341.6 634.4 80 64 80 80 0 0 +locus_17:16:0:9 9 16 1836 1836 1 1836 1 1836 1836 0 100.0 100 100 plus 0.0 3391 16 locus_17 SALM_1937 > GTPase HflX Q 1836 a0d97d985483413f3c18bfe5833ae9ce 612 d6012eddc7a6e8d7d40761d00ed71a5a 1285.2 2386.8 428.4 795.6 80 64 80 80 0 0 +locus_18:17:0:10 10 17 1914 1914 1 1914 1 1914 1914 0 100.0 100 100 plus 0.0 3535 17 locus_18 SALM_1997 ? biosynthetic arginine decarboxylase R 1914 b3021e979faa7600756c06dfadfcf14c 638 e68e83956ee1d4c685571e5348c8def1 1339.8 2488.2 446.6 829.4 80 64 80 80 0 0 +locus_19:18:0:11 11 18 2037 2037 1 2037 1 2037 2037 0 100.0 100 100 plus 0.0 3762 18 locus_19 SALM_9926 , https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662 S 2037 a012eee23637b48e39b00808a057e35d 679 1302fea1dbd5bb756db34e09b863ad44 1425.9 2648.1 475.3 882.7 80 64 80 80 0 0 +locus_2:1:0:12 12 1 285 285 1 285 1 285 285 0 100.0 100 100 plus 1.03e-152 527 1 locus_2 SALM_120 @ outer membrane protein B 285 e35184c8ff18e9116fc8faef20532f56 95 2d9e7f7f0d11bf02db860b0799e7924d 199.5 370.5 66.5 123.5 80 64 80 80 0 0 +locus_20:19:0:13 13 19 4935 4935 1 4935 1 4935 4935 0 100.0 100 100 plus 0.0 9114 19 locus_20 SALM_6064 . alpha-2-macroglobulin family protein T 4935 4461918e985715e4a2b07494e1f91326 1645 12ff39a1933fa478729b142976b0a659 3454.5 6415.5 1151.5 2138.5 80 64 80 80 0 0 +locus_3:2:0:14 14 2 327 327 1 327 1 327 327 0 100.0 100 100 plus 5.34e-176 604 2 locus_3 SALM_2016 # tRNA (guanosine(46)-N7)-methyltransferase TrmB C 327 670705cd2a59c4a23a897ac656a888fe 109 4277f8bbf1fd6682001da089fea83d04 228.9 425.1 76.3 141.7 80 64 80 80 0 0 +locus_4:3:0:15 15 3 417 417 1 417 1 417 417 0 100.0 100 100 plus 0.0 771 3 locus_4 SALM_8644 $ AZ624_004720 AZ624_004720 417 ac1b21798c0f672ad26f5a91ea278590 139 0c25367401155278f34832f184ab44e6 291.9 542.1 97.3 180.7 80 64 80 80 0 0 +locus_5:4:0:16 16 4 444 444 1 444 1 444 444 0 100.0 100 100 plus 0.0 821 4 locus_5 SALM_1876 % SPI-1 type III secretion system invasion lipoprotein InvH E 444 d00defcca8588f21ce16fa1d0ac13389 148 c8bf12a8057fc5e541fcd4924136a40d 310.8 577.2 103.6 192.4 80 64 80 80 0 0 +locus_6:5:0:17 17 5 543 543 1 543 1 543 543 0 100.0 100 100 plus 0.0 1003 5 locus_6 SALM_640 ^ MOSC domain-containing protein F 543 a11561f2804e2c32c78049f8b9aeb517 181 1f6a45ea291940ef2c17ec1cfdd5fbdd 380.1 705.9 126.7 235.3 80 64 80 80 0 0 +locus_7:6:0:18 18 6 606 606 1 606 1 606 606 0 100.0 100 100 plus 0.0 1120 6 locus_7 SALM_1501 & India: Vellore G 606 dc94bf1ec4ff9bed2a1f460cbd958656 202 62252b3326997117f127efb88ff09294 424.2 787.8 141.4 262.6 80 64 80 80 0 0 +locus_8:7:0:19 19 7 642 642 1 642 1 642 642 0 100.0 100 100 plus 0.0 1186 7 locus_8 SALM_756 * DNA polymerase III subunit delta' H 642 7ebe74afecf146ec4db816c8deced64f 214 2449629747a7c58f0f2cad411db87178 449.4 834.6 149.8 278.2 80 64 80 80 0 0 +locus_9:8:0:20 20 8 684 684 1 684 1 684 684 0 100.0 100 100 plus 0.0 1264 8 locus_9 SALM_7353 1 fimbrial assembly chaperone I 684 41ebb36872854b2b33c8c028e23d8ad1 228 a75a93991228940fb46d917f238beb5a 478.8 889.2 159.6 296.4 80 64 80 80 0 0 diff --git a/tests/test_data/outputs/report/normal/G12/profile.json b/tests/test_data/outputs/report/normal/G12/profile.json new file mode 100755 index 0000000..5b138f1 --- /dev/null +++ b/tests/test_data/outputs/report/normal/G12/profile.json @@ -0,0 +1,24 @@ +{ + "G12": { + "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", + "locus_2": "e35184c8ff18e9116fc8faef20532f56", + "locus_3": "670705cd2a59c4a23a897ac656a888fe", + "locus_4": "ac1b21798c0f672ad26f5a91ea278590", + "locus_5": "d00defcca8588f21ce16fa1d0ac13389", + "locus_6": "a11561f2804e2c32c78049f8b9aeb517", + "locus_7": "dc94bf1ec4ff9bed2a1f460cbd958656", + "locus_8": "7ebe74afecf146ec4db816c8deced64f", + "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", + "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", + "locus_11": "5b128d659955716833ce42f2bb060212", + "locus_12": "60934464690fea26102d1c8c9acb755d", + "locus_13": "8f300259dcb46224bdc1fe5273107324", + "locus_14": "b9060019038526aa6fc38d2f7510edc6", + "locus_15": "bc98c2fe196a68a79036814396513a8d", + "locus_16": "16e55766c603fe33c9e75d8e81743ae2", + "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", + "locus_18": "b3021e979faa7600756c06dfadfcf14c", + "locus_19": "a012eee23637b48e39b00808a057e35d", + "locus_20": "4461918e985715e4a2b07494e1f91326" + } +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/normal/G12/protein.hits.txt b/tests/test_data/outputs/report/normal/G12/protein.hits.txt new file mode 100755 index 0000000..ea3fc4d --- /dev/null +++ b/tests/test_data/outputs/report/normal/G12/protein.hits.txt @@ -0,0 +1,22 @@ +query_name qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore seq_id locus_name locus_name_alt locus_product locus_description locus_uid dna_seq_len dna_seq_hash aa_seq_len aa_seq_hash dna_min_len dna_max_len aa_min_len aa_max_len dna_min_ident aa_min_ident min_dna_match_cov min_aa_match_cov count_int_stops dna_ambig_count +locus_1:0:0:0 0 0 34 34 1 34 1 34 34 0 100.0 100 100 4.94e-20 64.3 0 locus_1 SALM_11273 ! hypothetical protein 1 102 d17b02d12afa7f832ee37df6f24a8f55 34 84ac553cb45dd790c497c27152888f02 71.4 132.6 23.8 44.2 80 64 80 80 0 0 +locus_10:9:0:1 1 9 254 254 1 254 1 254 254 0 100.0 100 100 0.0 514.0 9 locus_10 SALM_1891 200.96 5'/3'-nucleotidase SurE J 762 fe04d17ec353c08b903c85fc0ca4dc02 254 bd09702e070040e0fc8d2ec3b830812c 533.4 990.6 177.8 330.2 80 64 80 80 0 0 +locus_11:10:0:2 2 10 286 286 1 286 1 286 286 0 100.0 100 100 0.0 579.0 10 locus_11 SALM_1452 | 1-phosphofructokinase K 858 5b128d659955716833ce42f2bb060212 286 4daecf1a6ccae76e1d97a4ce9ee4ff0b 600.6 1115.4 200.2 371.8 80 64 80 80 0 0 +locus_12:11:0:3 3 11 324 324 1 324 1 324 324 0 100.0 100 100 0.0 640.0 11 locus_12 SALM_11020 _ 1 L 972 eb72da68c159497d5f0c8eeddc51b5ae 324 a1f16dd269dc295e715f2d00f9c26b43 680.4 1263.6 226.8 421.2 80 64 80 80 0 0 +locus_12:11:0:4 4 11 324 324 1 324 1 324 324 0 100.0 100 100 0.0 640.0 11 locus_12 SALM_11020 _ 1 L 972 eb72da68c159497d5f0c8eeddc51b5ae 324 a1f16dd269dc295e715f2d00f9c26b43 680.4 1263.6 226.8 421.2 80 64 80 80 0 0 +locus_13:12:0:5 5 12 366 366 1 366 1 366 366 0 100.0 100 100 0.0 754.0 12 locus_13 SALM_1934 - 3.9 M 1098 8f300259dcb46224bdc1fe5273107324 366 b16bc26e42f5bc4327504b1ec8b2d53d 768.6 1427.4 256.2 475.8 80 64 80 80 0 0 +locus_14:13:0:6 6 13 427 427 1 427 1 427 427 0 100.0 100 100 0.0 869.0 13 locus_14 SALM_2871 + @ N 1281 b9060019038526aa6fc38d2f7510edc6 427 ed11561b2bedaa12c7a28eb0e9346101 896.7 1665.3 298.9 555.1 80 64 80 80 0 0 +locus_15:14:0:7 7 14 478 478 1 478 1 478 478 0 100.0 100 100 0.0 972.0 14 locus_15 SALM_583 = DMT family transporter O 1434 bc98c2fe196a68a79036814396513a8d 478 16cb2acec887d5861327e04f2705b8ce 1003.8 1864.2 334.6 621.4 80 64 80 80 0 0 +locus_16:15:0:8 8 15 488 488 1 488 1 488 488 0 100.0 100 100 0.0 1004.0 15 locus_16 SALM_780 < murein transglycosylase A P 1464 16e55766c603fe33c9e75d8e81743ae2 488 b20314e55f9713235e9c4ea5817b56df 1024.8 1903.2 341.6 634.4 80 64 80 80 0 0 +locus_17:16:0:9 9 16 612 612 1 612 1 612 612 0 100.0 100 100 0.0 1241.0 16 locus_17 SALM_1937 > GTPase HflX Q 1836 a0d97d985483413f3c18bfe5833ae9ce 612 d6012eddc7a6e8d7d40761d00ed71a5a 1285.2 2386.8 428.4 795.6 80 64 80 80 0 0 +locus_18:17:0:10 10 17 638 638 1 638 1 638 638 0 100.0 100 100 0.0 1326.0 17 locus_18 SALM_1997 ? biosynthetic arginine decarboxylase R 1914 b3021e979faa7600756c06dfadfcf14c 638 e68e83956ee1d4c685571e5348c8def1 1339.8 2488.2 446.6 829.4 80 64 80 80 0 0 +locus_19:18:0:11 11 18 679 679 1 679 1 679 679 0 100.0 100 100 0.0 1419.0 18 locus_19 SALM_9926 , https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662 S 2037 a012eee23637b48e39b00808a057e35d 679 1302fea1dbd5bb756db34e09b863ad44 1425.9 2648.1 475.3 882.7 80 64 80 80 0 0 +locus_2:1:0:12 12 1 95 95 1 95 1 95 95 0 100.0 100 100 1.2299999999999998e-68 191.0 1 locus_2 SALM_120 @ outer membrane protein B 285 e35184c8ff18e9116fc8faef20532f56 95 2d9e7f7f0d11bf02db860b0799e7924d 199.5 370.5 66.5 123.5 80 64 80 80 0 0 +locus_20:19:0:13 13 19 1645 1645 1 1645 1 1645 1645 0 100.0 100 100 0.0 3332.0 19 locus_20 SALM_6064 . alpha-2-macroglobulin family protein T 4935 4461918e985715e4a2b07494e1f91326 1645 12ff39a1933fa478729b142976b0a659 3454.5 6415.5 1151.5 2138.5 80 64 80 80 0 0 +locus_3:2:0:14 14 2 109 109 1 109 1 109 109 0 100.0 100 100 3.09e-81 224.0 2 locus_3 SALM_2016 # tRNA (guanosine(46)-N7)-methyltransferase TrmB C 327 670705cd2a59c4a23a897ac656a888fe 109 4277f8bbf1fd6682001da089fea83d04 228.9 425.1 76.3 141.7 80 64 80 80 0 0 +locus_4:3:0:15 15 3 139 139 1 139 1 139 139 0 100.0 100 100 2.34e-106 290.0 3 locus_4 SALM_8644 $ AZ624_004720 AZ624_004720 417 ac1b21798c0f672ad26f5a91ea278590 139 0c25367401155278f34832f184ab44e6 291.9 542.1 97.3 180.7 80 64 80 80 0 0 +locus_5:4:0:16 16 4 148 148 1 148 1 148 148 0 100.0 100 100 1.0400000000000001e-110 302.0 4 locus_5 SALM_1876 % SPI-1 type III secretion system invasion lipoprotein InvH E 444 d00defcca8588f21ce16fa1d0ac13389 148 c8bf12a8057fc5e541fcd4924136a40d 310.8 577.2 103.6 192.4 80 64 80 80 0 0 +locus_6:5:0:17 17 5 181 181 1 181 1 181 181 0 100.0 100 100 1.7899999999999999e-140 380.0 5 locus_6 SALM_640 ^ MOSC domain-containing protein F 543 a11561f2804e2c32c78049f8b9aeb517 181 1f6a45ea291940ef2c17ec1cfdd5fbdd 380.1 705.9 126.7 235.3 80 64 80 80 0 0 +locus_7:6:0:18 18 6 202 202 1 202 1 202 202 0 100.0 100 100 3.71e-154 416.0 6 locus_7 SALM_1501 & India: Vellore G 606 dc94bf1ec4ff9bed2a1f460cbd958656 202 62252b3326997117f127efb88ff09294 424.2 787.8 141.4 262.6 80 64 80 80 0 0 +locus_8:7:0:19 19 7 214 214 1 214 1 214 214 0 100.0 100 100 1.75e-157 426.0 7 locus_8 SALM_756 * DNA polymerase III subunit delta' H 642 7ebe74afecf146ec4db816c8deced64f 214 2449629747a7c58f0f2cad411db87178 449.4 834.6 149.8 278.2 80 64 80 80 0 0 +locus_9:8:0:20 20 8 228 228 1 228 1 228 228 0 100.0 100 100 6.32e-172 463.0 8 locus_9 SALM_7353 1 fimbrial assembly chaperone I 684 41ebb36872854b2b33c8c028e23d8ad1 228 a75a93991228940fb46d917f238beb5a 478.8 889.2 159.6 296.4 80 64 80 80 0 0 diff --git a/tests/test_data/outputs/report/normal/G13/nucleotide.hits.txt b/tests/test_data/outputs/report/normal/G13/nucleotide.hits.txt new file mode 100755 index 0000000..a9dead4 --- /dev/null +++ b/tests/test_data/outputs/report/normal/G13/nucleotide.hits.txt @@ -0,0 +1,22 @@ +query_name qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore seq_id locus_name locus_name_alt locus_product locus_description locus_uid dna_seq_len dna_seq_hash aa_seq_len aa_seq_hash dna_min_len dna_max_len aa_min_len aa_max_len dna_min_ident aa_min_ident min_dna_match_cov min_aa_match_cov count_int_stops dna_ambig_count +locus_1:0:0:0 0 0 102 102 1 102 1 102 102 0 100.0 100 100 plus 1.8e-51 189 0 locus_1 SALM_11273 ! hypothetical protein 1 102 d17b02d12afa7f832ee37df6f24a8f55 34 84ac553cb45dd790c497c27152888f02 71.4 132.6 23.8 44.2 80 64 80 80 0 0 +locus_10:9:0:1 1 9 762 762 1 762 1 762 762 0 100.0 100 100 plus 0.0 1408 9 locus_10 SALM_1891 200.96 5'/3'-nucleotidase SurE J 762 fe04d17ec353c08b903c85fc0ca4dc02 254 bd09702e070040e0fc8d2ec3b830812c 533.4 990.6 177.8 330.2 80 64 80 80 0 0 +locus_11:10:0:2 2 10 858 858 1 858 1 858 858 19 97.786 100 100 plus 0.0 1480 10 locus_11 SALM_1452 | 1-phosphofructokinase K 858 5b128d659955716833ce42f2bb060212 286 4daecf1a6ccae76e1d97a4ce9ee4ff0b 600.6 1115.4 200.2 371.8 80 64 80 80 0 0 +locus_12:11:0:3 3 11 972 972 1 972 1 972 972 0 100.0 100 100 plus 0.0 1796 11 locus_12 SALM_11020 _ 1 L 972 eb72da68c159497d5f0c8eeddc51b5ae 324 a1f16dd269dc295e715f2d00f9c26b43 680.4 1263.6 226.8 421.2 80 64 80 80 0 0 +locus_13:12:0:4 4 12 1098 1098 1 1098 1 1098 1098 0 100.0 100 100 plus 0.0 2028 12 locus_13 SALM_1934 - 3.9 M 1098 8f300259dcb46224bdc1fe5273107324 366 b16bc26e42f5bc4327504b1ec8b2d53d 768.6 1427.4 256.2 475.8 80 64 80 80 0 0 +locus_13:12:0:5 5 12 1098 1098 1 1098 1 1098 1098 0 100.0 100 100 plus 0.0 2028 12 locus_13 SALM_1934 - 3.9 M 1098 8f300259dcb46224bdc1fe5273107324 366 b16bc26e42f5bc4327504b1ec8b2d53d 768.6 1427.4 256.2 475.8 80 64 80 80 0 0 +locus_14:13:0:6 6 13 1281 1281 1 1281 1 1281 1281 11 99.141 100 100 plus 0.0 2305 13 locus_14 SALM_2871 + @ N 1281 b9060019038526aa6fc38d2f7510edc6 427 ed11561b2bedaa12c7a28eb0e9346101 896.7 1665.3 298.9 555.1 80 64 80 80 0 0 +locus_15:14:0:7 7 14 1434 1434 1 1434 1 1434 1434 0 100.0 100 100 plus 0.0 2649 14 locus_15 SALM_583 = DMT family transporter O 1434 bc98c2fe196a68a79036814396513a8d 478 16cb2acec887d5861327e04f2705b8ce 1003.8 1864.2 334.6 621.4 80 64 80 80 0 0 +locus_16:15:0:8 8 15 1464 1464 1 1464 1 1464 1464 15 98.975 100 100 plus 0.0 2621 15 locus_16 SALM_780 < murein transglycosylase A P 1464 16e55766c603fe33c9e75d8e81743ae2 488 b20314e55f9713235e9c4ea5817b56df 1024.8 1903.2 341.6 634.4 80 64 80 80 0 0 +locus_17:16:0:9 9 16 1836 1836 1 1836 1 1836 1836 0 100.0 100 100 plus 0.0 3391 16 locus_17 SALM_1937 > GTPase HflX Q 1836 a0d97d985483413f3c18bfe5833ae9ce 612 d6012eddc7a6e8d7d40761d00ed71a5a 1285.2 2386.8 428.4 795.6 80 64 80 80 0 0 +locus_18:17:0:10 10 17 1914 1914 1 1914 1 1914 1914 0 100.0 100 100 plus 0.0 3535 17 locus_18 SALM_1997 ? biosynthetic arginine decarboxylase R 1914 b3021e979faa7600756c06dfadfcf14c 638 e68e83956ee1d4c685571e5348c8def1 1339.8 2488.2 446.6 829.4 80 64 80 80 0 0 +locus_19:18:0:11 11 18 2037 2037 1 2037 1 2037 2037 16 99.215 100 100 plus 0.0 3674 18 locus_19 SALM_9926 , https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662 S 2037 a012eee23637b48e39b00808a057e35d 679 1302fea1dbd5bb756db34e09b863ad44 1425.9 2648.1 475.3 882.7 80 64 80 80 0 0 +locus_2:1:0:12 12 1 285 285 1 285 1 285 285 17 94.035 100 100 plus 2.3100000000000004e-124 433 1 locus_2 SALM_120 @ outer membrane protein B 285 e35184c8ff18e9116fc8faef20532f56 95 2d9e7f7f0d11bf02db860b0799e7924d 199.5 370.5 66.5 123.5 80 64 80 80 0 0 +locus_20:19:0:13 13 19 4935 4935 1 4935 1 4935 4935 0 100.0 100 100 plus 0.0 9114 19 locus_20 SALM_6064 . alpha-2-macroglobulin family protein T 4935 4461918e985715e4a2b07494e1f91326 1645 12ff39a1933fa478729b142976b0a659 3454.5 6415.5 1151.5 2138.5 80 64 80 80 0 0 +locus_3:2:0:14 14 2 327 327 1 327 1 327 327 0 100.0 100 100 plus 5.34e-176 604 2 locus_3 SALM_2016 # tRNA (guanosine(46)-N7)-methyltransferase TrmB C 327 670705cd2a59c4a23a897ac656a888fe 109 4277f8bbf1fd6682001da089fea83d04 228.9 425.1 76.3 141.7 80 64 80 80 0 0 +locus_4:3:0:15 15 3 417 417 1 417 1 417 417 11 97.362 100 100 plus 0.0 710 3 locus_4 SALM_8644 $ AZ624_004720 AZ624_004720 417 ac1b21798c0f672ad26f5a91ea278590 139 0c25367401155278f34832f184ab44e6 291.9 542.1 97.3 180.7 80 64 80 80 0 0 +locus_5:4:0:16 16 4 444 444 1 444 1 444 444 15 96.622 100 100 plus 0.0 737 4 locus_5 SALM_1876 % SPI-1 type III secretion system invasion lipoprotein InvH E 444 d00defcca8588f21ce16fa1d0ac13389 148 c8bf12a8057fc5e541fcd4924136a40d 310.8 577.2 103.6 192.4 80 64 80 80 0 0 +locus_6:5:0:17 17 5 543 543 1 543 1 543 543 0 100.0 100 100 plus 0.0 1003 5 locus_6 SALM_640 ^ MOSC domain-containing protein F 543 a11561f2804e2c32c78049f8b9aeb517 181 1f6a45ea291940ef2c17ec1cfdd5fbdd 380.1 705.9 126.7 235.3 80 64 80 80 0 0 +locus_7:6:0:18 18 6 606 606 1 606 1 606 606 15 97.525 100 100 plus 0.0 1037 6 locus_7 SALM_1501 & India: Vellore G 606 dc94bf1ec4ff9bed2a1f460cbd958656 202 62252b3326997117f127efb88ff09294 424.2 787.8 141.4 262.6 80 64 80 80 0 0 +locus_8:7:0:19 19 7 642 642 1 642 1 642 642 0 100.0 100 100 plus 0.0 1186 7 locus_8 SALM_756 * DNA polymerase III subunit delta' H 642 7ebe74afecf146ec4db816c8deced64f 214 2449629747a7c58f0f2cad411db87178 449.4 834.6 149.8 278.2 80 64 80 80 0 0 +locus_9:8:0:20 20 8 684 684 1 684 1 684 684 0 100.0 100 100 plus 0.0 1264 8 locus_9 SALM_7353 1 fimbrial assembly chaperone I 684 41ebb36872854b2b33c8c028e23d8ad1 228 a75a93991228940fb46d917f238beb5a 478.8 889.2 159.6 296.4 80 64 80 80 0 0 diff --git a/tests/test_data/outputs/report/normal/G13/profile.json b/tests/test_data/outputs/report/normal/G13/profile.json new file mode 100755 index 0000000..29e62b3 --- /dev/null +++ b/tests/test_data/outputs/report/normal/G13/profile.json @@ -0,0 +1,24 @@ +{ + "G13": { + "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", + "locus_2": "8b70e777f6bbf2c91ff75947824b5976", + "locus_3": "670705cd2a59c4a23a897ac656a888fe", + "locus_4": "73790840c76943caac0ebb3b2b3f0b98", + "locus_5": "8cf4341689dd00f74adfcc43d1f4a35e", + "locus_6": "a11561f2804e2c32c78049f8b9aeb517", + "locus_7": "49d9878c9d3071aa1d2f26cb947b784c", + "locus_8": "7ebe74afecf146ec4db816c8deced64f", + "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", + "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", + "locus_11": "c4266f2f24fdd8e039113c6b0955af9f", + "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", + "locus_13": "70e88b95c11c37150f37312882af5771", + "locus_14": "2fa0b06ed72e36b4071cab9d0b4f87d0", + "locus_15": "bc98c2fe196a68a79036814396513a8d", + "locus_16": "a9b3cb97dac3cda6e932a49bf9a507bd", + "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", + "locus_18": "b3021e979faa7600756c06dfadfcf14c", + "locus_19": "de32372598811d63bcc1a0eaf6872644", + "locus_20": "4461918e985715e4a2b07494e1f91326" + } +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/normal/G13/protein.hits.txt b/tests/test_data/outputs/report/normal/G13/protein.hits.txt new file mode 100755 index 0000000..e711712 --- /dev/null +++ b/tests/test_data/outputs/report/normal/G13/protein.hits.txt @@ -0,0 +1,22 @@ +query_name qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore seq_id locus_name locus_name_alt locus_product locus_description locus_uid dna_seq_len dna_seq_hash aa_seq_len aa_seq_hash dna_min_len dna_max_len aa_min_len aa_max_len dna_min_ident aa_min_ident min_dna_match_cov min_aa_match_cov count_int_stops dna_ambig_count +locus_1:0:0:0 0 0 34 34 1 34 1 34 34 0 100.0 100 100 4.94e-20 64.3 0 locus_1 SALM_11273 ! hypothetical protein 1 102 d17b02d12afa7f832ee37df6f24a8f55 34 84ac553cb45dd790c497c27152888f02 71.4 132.6 23.8 44.2 80 64 80 80 0 0 +locus_10:9:0:1 1 9 254 254 1 254 1 254 254 0 100.0 100 100 0.0 514.0 9 locus_10 SALM_1891 200.96 5'/3'-nucleotidase SurE J 762 fe04d17ec353c08b903c85fc0ca4dc02 254 bd09702e070040e0fc8d2ec3b830812c 533.4 990.6 177.8 330.2 80 64 80 80 0 0 +locus_11:10:0:2 2 10 286 286 1 286 1 286 286 17 94.056 100 100 0.0 518.0 10 locus_11 SALM_1452 | 1-phosphofructokinase K 858 5b128d659955716833ce42f2bb060212 286 4daecf1a6ccae76e1d97a4ce9ee4ff0b 600.6 1115.4 200.2 371.8 80 64 80 80 0 0 +locus_12:11:0:3 3 11 324 324 1 324 1 324 324 0 100.0 100 100 0.0 640.0 11 locus_12 SALM_11020 _ 1 L 972 eb72da68c159497d5f0c8eeddc51b5ae 324 a1f16dd269dc295e715f2d00f9c26b43 680.4 1263.6 226.8 421.2 80 64 80 80 0 0 +locus_13:12:0:4 4 12 366 366 1 366 1 366 366 0 100.0 100 100 0.0 754.0 12 locus_13 SALM_1934 - 3.9 M 1098 8f300259dcb46224bdc1fe5273107324 366 b16bc26e42f5bc4327504b1ec8b2d53d 768.6 1427.4 256.2 475.8 80 64 80 80 0 0 +locus_13:12:0:5 5 12 366 366 1 366 1 366 366 0 100.0 100 100 0.0 754.0 12 locus_13 SALM_1934 - 3.9 M 1098 8f300259dcb46224bdc1fe5273107324 366 b16bc26e42f5bc4327504b1ec8b2d53d 768.6 1427.4 256.2 475.8 80 64 80 80 0 0 +locus_14:13:0:6 6 13 427 427 1 427 1 427 427 9 97.892 100 100 0.0 846.0 13 locus_14 SALM_2871 + @ N 1281 b9060019038526aa6fc38d2f7510edc6 427 ed11561b2bedaa12c7a28eb0e9346101 896.7 1665.3 298.9 555.1 80 64 80 80 0 0 +locus_15:14:0:7 7 14 478 478 1 478 1 478 478 0 100.0 100 100 0.0 972.0 14 locus_15 SALM_583 = DMT family transporter O 1434 bc98c2fe196a68a79036814396513a8d 478 16cb2acec887d5861327e04f2705b8ce 1003.8 1864.2 334.6 621.4 80 64 80 80 0 0 +locus_16:15:0:8 8 15 488 488 1 488 1 488 488 14 97.131 100 100 0.0 972.0 15 locus_16 SALM_780 < murein transglycosylase A P 1464 16e55766c603fe33c9e75d8e81743ae2 488 b20314e55f9713235e9c4ea5817b56df 1024.8 1903.2 341.6 634.4 80 64 80 80 0 0 +locus_17:16:0:9 9 16 612 612 1 612 1 612 612 0 100.0 100 100 0.0 1241.0 16 locus_17 SALM_1937 > GTPase HflX Q 1836 a0d97d985483413f3c18bfe5833ae9ce 612 d6012eddc7a6e8d7d40761d00ed71a5a 1285.2 2386.8 428.4 795.6 80 64 80 80 0 0 +locus_18:17:0:10 10 17 638 638 1 638 1 638 638 0 100.0 100 100 0.0 1326.0 17 locus_18 SALM_1997 ? biosynthetic arginine decarboxylase R 1914 b3021e979faa7600756c06dfadfcf14c 638 e68e83956ee1d4c685571e5348c8def1 1339.8 2488.2 446.6 829.4 80 64 80 80 0 0 +locus_19:18:0:11 11 18 679 679 1 679 1 679 679 8 98.822 100 100 0.0 1403.0 18 locus_19 SALM_9926 , https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662 S 2037 a012eee23637b48e39b00808a057e35d 679 1302fea1dbd5bb756db34e09b863ad44 1425.9 2648.1 475.3 882.7 80 64 80 80 0 0 +locus_2:1:0:12 12 1 95 95 1 95 1 95 95 11 88.421 100 100 2.89e-51 147.0 1 locus_2 SALM_120 @ outer membrane protein B 285 e35184c8ff18e9116fc8faef20532f56 95 2d9e7f7f0d11bf02db860b0799e7924d 199.5 370.5 66.5 123.5 80 64 80 80 0 0 +locus_20:19:0:13 13 19 1645 1645 1 1645 1 1645 1645 0 100.0 100 100 0.0 3332.0 19 locus_20 SALM_6064 . alpha-2-macroglobulin family protein T 4935 4461918e985715e4a2b07494e1f91326 1645 12ff39a1933fa478729b142976b0a659 3454.5 6415.5 1151.5 2138.5 80 64 80 80 0 0 +locus_3:2:0:14 14 2 109 109 1 109 1 109 109 0 100.0 100 100 3.09e-81 224.0 2 locus_3 SALM_2016 # tRNA (guanosine(46)-N7)-methyltransferase TrmB C 327 670705cd2a59c4a23a897ac656a888fe 109 4277f8bbf1fd6682001da089fea83d04 228.9 425.1 76.3 141.7 80 64 80 80 0 0 +locus_4:3:0:15 15 3 139 139 1 139 1 139 139 9 93.525 100 100 3.6999999999999996e-98 269.0 3 locus_4 SALM_8644 $ AZ624_004720 AZ624_004720 417 ac1b21798c0f672ad26f5a91ea278590 139 0c25367401155278f34832f184ab44e6 291.9 542.1 97.3 180.7 80 64 80 80 0 0 +locus_5:4:0:16 16 4 148 148 1 148 1 148 148 11 92.568 100 100 1.5600000000000001e-97 268.0 4 locus_5 SALM_1876 % SPI-1 type III secretion system invasion lipoprotein InvH E 444 d00defcca8588f21ce16fa1d0ac13389 148 c8bf12a8057fc5e541fcd4924136a40d 310.8 577.2 103.6 192.4 80 64 80 80 0 0 +locus_6:5:0:17 17 5 181 181 1 181 1 181 181 0 100.0 100 100 1.7899999999999999e-140 380.0 5 locus_6 SALM_640 ^ MOSC domain-containing protein F 543 a11561f2804e2c32c78049f8b9aeb517 181 1f6a45ea291940ef2c17ec1cfdd5fbdd 380.1 705.9 126.7 235.3 80 64 80 80 0 0 +locus_7:6:0:18 18 6 202 202 1 202 1 202 202 14 93.069 100 100 2.36e-141 384.0 6 locus_7 SALM_1501 & India: Vellore G 606 dc94bf1ec4ff9bed2a1f460cbd958656 202 62252b3326997117f127efb88ff09294 424.2 787.8 141.4 262.6 80 64 80 80 0 0 +locus_8:7:0:19 19 7 214 214 1 214 1 214 214 0 100.0 100 100 1.75e-157 426.0 7 locus_8 SALM_756 * DNA polymerase III subunit delta' H 642 7ebe74afecf146ec4db816c8deced64f 214 2449629747a7c58f0f2cad411db87178 449.4 834.6 149.8 278.2 80 64 80 80 0 0 +locus_9:8:0:20 20 8 228 228 1 228 1 228 228 0 100.0 100 100 6.32e-172 463.0 8 locus_9 SALM_7353 1 fimbrial assembly chaperone I 684 41ebb36872854b2b33c8c028e23d8ad1 228 a75a93991228940fb46d917f238beb5a 478.8 889.2 159.6 296.4 80 64 80 80 0 0 diff --git a/tests/test_data/outputs/report/normal/G14/nucleotide.hits.txt b/tests/test_data/outputs/report/normal/G14/nucleotide.hits.txt new file mode 100755 index 0000000..d0efc02 --- /dev/null +++ b/tests/test_data/outputs/report/normal/G14/nucleotide.hits.txt @@ -0,0 +1,22 @@ +query_name qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore seq_id locus_name locus_name_alt locus_product locus_description locus_uid dna_seq_len dna_seq_hash aa_seq_len aa_seq_hash dna_min_len dna_max_len aa_min_len aa_max_len dna_min_ident aa_min_ident min_dna_match_cov min_aa_match_cov count_int_stops dna_ambig_count +locus_1:0:0:0 0 0 102 102 1 102 1 102 102 0 100.0 100 100 plus 1.8e-51 189 0 locus_1 SALM_11273 ! hypothetical protein 1 102 d17b02d12afa7f832ee37df6f24a8f55 34 84ac553cb45dd790c497c27152888f02 71.4 132.6 23.8 44.2 80 64 80 80 0 0 +locus_10:9:0:1 1 9 762 762 1 762 1 762 762 0 100.0 100 100 plus 0.0 1408 9 locus_10 SALM_1891 200.96 5'/3'-nucleotidase SurE J 762 fe04d17ec353c08b903c85fc0ca4dc02 254 bd09702e070040e0fc8d2ec3b830812c 533.4 990.6 177.8 330.2 80 64 80 80 0 0 +locus_11:10:0:2 2 10 858 858 1 858 1 858 858 0 100.0 100 100 plus 0.0 1585 10 locus_11 SALM_1452 | 1-phosphofructokinase K 858 5b128d659955716833ce42f2bb060212 286 4daecf1a6ccae76e1d97a4ce9ee4ff0b 600.6 1115.4 200.2 371.8 80 64 80 80 0 0 +locus_12:11:0:3 3 11 972 972 1 972 1 972 972 0 100.0 100 100 plus 0.0 1796 11 locus_12 SALM_11020 _ 1 L 972 eb72da68c159497d5f0c8eeddc51b5ae 324 a1f16dd269dc295e715f2d00f9c26b43 680.4 1263.6 226.8 421.2 80 64 80 80 0 0 +locus_13:12:0:4 4 12 1098 1098 1 1098 1 1098 1098 0 100.0 100 100 plus 0.0 2028 12 locus_13 SALM_1934 - 3.9 M 1098 8f300259dcb46224bdc1fe5273107324 366 b16bc26e42f5bc4327504b1ec8b2d53d 768.6 1427.4 256.2 475.8 80 64 80 80 0 0 +locus_13:12:0:5 5 12 1098 1098 1 1098 1 1098 1098 0 100.0 100 100 plus 0.0 2028 12 locus_13 SALM_1934 - 3.9 M 1098 8f300259dcb46224bdc1fe5273107324 366 b16bc26e42f5bc4327504b1ec8b2d53d 768.6 1427.4 256.2 475.8 80 64 80 80 0 0 +locus_14:13:0:6 6 13 1281 1281 1 1281 1 1281 1281 0 100.0 100 100 plus 0.0 2366 13 locus_14 SALM_2871 + @ N 1281 b9060019038526aa6fc38d2f7510edc6 427 ed11561b2bedaa12c7a28eb0e9346101 896.7 1665.3 298.9 555.1 80 64 80 80 0 0 +locus_15:14:0:7 7 14 1434 1434 1 1434 1 1434 1434 0 100.0 100 100 plus 0.0 2649 14 locus_15 SALM_583 = DMT family transporter O 1434 bc98c2fe196a68a79036814396513a8d 478 16cb2acec887d5861327e04f2705b8ce 1003.8 1864.2 334.6 621.4 80 64 80 80 0 0 +locus_16:15:0:8 8 15 1464 1464 1 1464 1 1464 1464 0 100.0 100 100 plus 0.0 2704 15 locus_16 SALM_780 < murein transglycosylase A P 1464 16e55766c603fe33c9e75d8e81743ae2 488 b20314e55f9713235e9c4ea5817b56df 1024.8 1903.2 341.6 634.4 80 64 80 80 0 0 +locus_17:16:0:9 9 16 1836 1836 1 1836 1 1836 1836 0 100.0 100 100 plus 0.0 3391 16 locus_17 SALM_1937 > GTPase HflX Q 1836 a0d97d985483413f3c18bfe5833ae9ce 612 d6012eddc7a6e8d7d40761d00ed71a5a 1285.2 2386.8 428.4 795.6 80 64 80 80 0 0 +locus_18:17:0:10 10 17 1914 1914 1 1914 1 1914 1914 0 100.0 100 100 plus 0.0 3535 17 locus_18 SALM_1997 ? biosynthetic arginine decarboxylase R 1914 b3021e979faa7600756c06dfadfcf14c 638 e68e83956ee1d4c685571e5348c8def1 1339.8 2488.2 446.6 829.4 80 64 80 80 0 0 +locus_19:18:0:11 11 18 2037 2037 1 2037 1 2037 2037 0 100.0 100 100 plus 0.0 3762 18 locus_19 SALM_9926 , https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662 S 2037 a012eee23637b48e39b00808a057e35d 679 1302fea1dbd5bb756db34e09b863ad44 1425.9 2648.1 475.3 882.7 80 64 80 80 0 0 +locus_2:1:0:12 12 1 285 285 1 285 1 285 285 0 100.0 100 100 plus 1.03e-152 527 1 locus_2 SALM_120 @ outer membrane protein B 285 e35184c8ff18e9116fc8faef20532f56 95 2d9e7f7f0d11bf02db860b0799e7924d 199.5 370.5 66.5 123.5 80 64 80 80 0 0 +locus_20:19:0:13 13 19 4935 4935 1 4935 1 4935 4935 0 100.0 100 100 plus 0.0 9114 19 locus_20 SALM_6064 . alpha-2-macroglobulin family protein T 4935 4461918e985715e4a2b07494e1f91326 1645 12ff39a1933fa478729b142976b0a659 3454.5 6415.5 1151.5 2138.5 80 64 80 80 0 0 +locus_3:2:0:14 14 2 327 327 1 327 1 327 327 0 100.0 100 100 plus 5.34e-176 604 2 locus_3 SALM_2016 # tRNA (guanosine(46)-N7)-methyltransferase TrmB C 327 670705cd2a59c4a23a897ac656a888fe 109 4277f8bbf1fd6682001da089fea83d04 228.9 425.1 76.3 141.7 80 64 80 80 0 0 +locus_4:3:0:15 15 3 417 417 1 417 1 417 417 0 100.0 100 100 plus 0.0 771 3 locus_4 SALM_8644 $ AZ624_004720 AZ624_004720 417 ac1b21798c0f672ad26f5a91ea278590 139 0c25367401155278f34832f184ab44e6 291.9 542.1 97.3 180.7 80 64 80 80 0 0 +locus_5:4:0:16 16 4 444 444 1 444 1 444 444 0 100.0 100 100 plus 0.0 821 4 locus_5 SALM_1876 % SPI-1 type III secretion system invasion lipoprotein InvH E 444 d00defcca8588f21ce16fa1d0ac13389 148 c8bf12a8057fc5e541fcd4924136a40d 310.8 577.2 103.6 192.4 80 64 80 80 0 0 +locus_6:5:0:17 17 5 543 543 1 543 1 543 543 0 100.0 100 100 plus 0.0 1003 5 locus_6 SALM_640 ^ MOSC domain-containing protein F 543 a11561f2804e2c32c78049f8b9aeb517 181 1f6a45ea291940ef2c17ec1cfdd5fbdd 380.1 705.9 126.7 235.3 80 64 80 80 0 0 +locus_7:6:0:18 18 6 606 606 1 606 1 606 606 0 100.0 100 100 plus 0.0 1120 6 locus_7 SALM_1501 & India: Vellore G 606 dc94bf1ec4ff9bed2a1f460cbd958656 202 62252b3326997117f127efb88ff09294 424.2 787.8 141.4 262.6 80 64 80 80 0 0 +locus_8:7:0:19 19 7 642 642 1 642 1 642 642 0 100.0 100 100 plus 0.0 1186 7 locus_8 SALM_756 * DNA polymerase III subunit delta' H 642 7ebe74afecf146ec4db816c8deced64f 214 2449629747a7c58f0f2cad411db87178 449.4 834.6 149.8 278.2 80 64 80 80 0 0 +locus_9:8:0:20 20 8 684 684 1 684 1 684 684 0 100.0 100 100 plus 0.0 1264 8 locus_9 SALM_7353 1 fimbrial assembly chaperone I 684 41ebb36872854b2b33c8c028e23d8ad1 228 a75a93991228940fb46d917f238beb5a 478.8 889.2 159.6 296.4 80 64 80 80 0 0 diff --git a/tests/test_data/outputs/report/normal/G14/profile.json b/tests/test_data/outputs/report/normal/G14/profile.json new file mode 100755 index 0000000..8752d17 --- /dev/null +++ b/tests/test_data/outputs/report/normal/G14/profile.json @@ -0,0 +1,24 @@ +{ + "G14": { + "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", + "locus_2": "e35184c8ff18e9116fc8faef20532f56", + "locus_3": "670705cd2a59c4a23a897ac656a888fe", + "locus_4": "ac1b21798c0f672ad26f5a91ea278590", + "locus_5": "d00defcca8588f21ce16fa1d0ac13389", + "locus_6": "a11561f2804e2c32c78049f8b9aeb517", + "locus_7": "dc94bf1ec4ff9bed2a1f460cbd958656", + "locus_8": "7ebe74afecf146ec4db816c8deced64f", + "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", + "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", + "locus_11": "5b128d659955716833ce42f2bb060212", + "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", + "locus_13": "70e88b95c11c37150f37312882af5771", + "locus_14": "b9060019038526aa6fc38d2f7510edc6", + "locus_15": "bc98c2fe196a68a79036814396513a8d", + "locus_16": "16e55766c603fe33c9e75d8e81743ae2", + "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", + "locus_18": "b3021e979faa7600756c06dfadfcf14c", + "locus_19": "a012eee23637b48e39b00808a057e35d", + "locus_20": "4461918e985715e4a2b07494e1f91326" + } +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/normal/G14/protein.hits.txt b/tests/test_data/outputs/report/normal/G14/protein.hits.txt new file mode 100755 index 0000000..f4627bc --- /dev/null +++ b/tests/test_data/outputs/report/normal/G14/protein.hits.txt @@ -0,0 +1,22 @@ +query_name qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore seq_id locus_name locus_name_alt locus_product locus_description locus_uid dna_seq_len dna_seq_hash aa_seq_len aa_seq_hash dna_min_len dna_max_len aa_min_len aa_max_len dna_min_ident aa_min_ident min_dna_match_cov min_aa_match_cov count_int_stops dna_ambig_count +locus_1:0:0:0 0 0 34 34 1 34 1 34 34 0 100.0 100 100 4.94e-20 64.3 0 locus_1 SALM_11273 ! hypothetical protein 1 102 d17b02d12afa7f832ee37df6f24a8f55 34 84ac553cb45dd790c497c27152888f02 71.4 132.6 23.8 44.2 80 64 80 80 0 0 +locus_10:9:0:1 1 9 254 254 1 254 1 254 254 0 100.0 100 100 0.0 514.0 9 locus_10 SALM_1891 200.96 5'/3'-nucleotidase SurE J 762 fe04d17ec353c08b903c85fc0ca4dc02 254 bd09702e070040e0fc8d2ec3b830812c 533.4 990.6 177.8 330.2 80 64 80 80 0 0 +locus_11:10:0:2 2 10 286 286 1 286 1 286 286 0 100.0 100 100 0.0 579.0 10 locus_11 SALM_1452 | 1-phosphofructokinase K 858 5b128d659955716833ce42f2bb060212 286 4daecf1a6ccae76e1d97a4ce9ee4ff0b 600.6 1115.4 200.2 371.8 80 64 80 80 0 0 +locus_12:11:0:3 3 11 324 324 1 324 1 324 324 0 100.0 100 100 0.0 640.0 11 locus_12 SALM_11020 _ 1 L 972 eb72da68c159497d5f0c8eeddc51b5ae 324 a1f16dd269dc295e715f2d00f9c26b43 680.4 1263.6 226.8 421.2 80 64 80 80 0 0 +locus_13:12:0:4 4 12 366 366 1 366 1 366 366 0 100.0 100 100 0.0 754.0 12 locus_13 SALM_1934 - 3.9 M 1098 8f300259dcb46224bdc1fe5273107324 366 b16bc26e42f5bc4327504b1ec8b2d53d 768.6 1427.4 256.2 475.8 80 64 80 80 0 0 +locus_13:12:0:5 5 12 366 366 1 366 1 366 366 0 100.0 100 100 0.0 754.0 12 locus_13 SALM_1934 - 3.9 M 1098 8f300259dcb46224bdc1fe5273107324 366 b16bc26e42f5bc4327504b1ec8b2d53d 768.6 1427.4 256.2 475.8 80 64 80 80 0 0 +locus_14:13:0:6 6 13 427 427 1 427 1 427 427 0 100.0 100 100 0.0 869.0 13 locus_14 SALM_2871 + @ N 1281 b9060019038526aa6fc38d2f7510edc6 427 ed11561b2bedaa12c7a28eb0e9346101 896.7 1665.3 298.9 555.1 80 64 80 80 0 0 +locus_15:14:0:7 7 14 478 478 1 478 1 478 478 0 100.0 100 100 0.0 972.0 14 locus_15 SALM_583 = DMT family transporter O 1434 bc98c2fe196a68a79036814396513a8d 478 16cb2acec887d5861327e04f2705b8ce 1003.8 1864.2 334.6 621.4 80 64 80 80 0 0 +locus_16:15:0:8 8 15 488 488 1 488 1 488 488 0 100.0 100 100 0.0 1004.0 15 locus_16 SALM_780 < murein transglycosylase A P 1464 16e55766c603fe33c9e75d8e81743ae2 488 b20314e55f9713235e9c4ea5817b56df 1024.8 1903.2 341.6 634.4 80 64 80 80 0 0 +locus_17:16:0:9 9 16 612 612 1 612 1 612 612 0 100.0 100 100 0.0 1241.0 16 locus_17 SALM_1937 > GTPase HflX Q 1836 a0d97d985483413f3c18bfe5833ae9ce 612 d6012eddc7a6e8d7d40761d00ed71a5a 1285.2 2386.8 428.4 795.6 80 64 80 80 0 0 +locus_18:17:0:10 10 17 638 638 1 638 1 638 638 0 100.0 100 100 0.0 1326.0 17 locus_18 SALM_1997 ? biosynthetic arginine decarboxylase R 1914 b3021e979faa7600756c06dfadfcf14c 638 e68e83956ee1d4c685571e5348c8def1 1339.8 2488.2 446.6 829.4 80 64 80 80 0 0 +locus_19:18:0:11 11 18 679 679 1 679 1 679 679 0 100.0 100 100 0.0 1419.0 18 locus_19 SALM_9926 , https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662 S 2037 a012eee23637b48e39b00808a057e35d 679 1302fea1dbd5bb756db34e09b863ad44 1425.9 2648.1 475.3 882.7 80 64 80 80 0 0 +locus_2:1:0:12 12 1 95 95 1 95 1 95 95 0 100.0 100 100 1.2299999999999998e-68 191.0 1 locus_2 SALM_120 @ outer membrane protein B 285 e35184c8ff18e9116fc8faef20532f56 95 2d9e7f7f0d11bf02db860b0799e7924d 199.5 370.5 66.5 123.5 80 64 80 80 0 0 +locus_20:19:0:13 13 19 1645 1645 1 1645 1 1645 1645 0 100.0 100 100 0.0 3332.0 19 locus_20 SALM_6064 . alpha-2-macroglobulin family protein T 4935 4461918e985715e4a2b07494e1f91326 1645 12ff39a1933fa478729b142976b0a659 3454.5 6415.5 1151.5 2138.5 80 64 80 80 0 0 +locus_3:2:0:14 14 2 109 109 1 109 1 109 109 0 100.0 100 100 3.09e-81 224.0 2 locus_3 SALM_2016 # tRNA (guanosine(46)-N7)-methyltransferase TrmB C 327 670705cd2a59c4a23a897ac656a888fe 109 4277f8bbf1fd6682001da089fea83d04 228.9 425.1 76.3 141.7 80 64 80 80 0 0 +locus_4:3:0:15 15 3 139 139 1 139 1 139 139 0 100.0 100 100 2.34e-106 290.0 3 locus_4 SALM_8644 $ AZ624_004720 AZ624_004720 417 ac1b21798c0f672ad26f5a91ea278590 139 0c25367401155278f34832f184ab44e6 291.9 542.1 97.3 180.7 80 64 80 80 0 0 +locus_5:4:0:16 16 4 148 148 1 148 1 148 148 0 100.0 100 100 1.0400000000000001e-110 302.0 4 locus_5 SALM_1876 % SPI-1 type III secretion system invasion lipoprotein InvH E 444 d00defcca8588f21ce16fa1d0ac13389 148 c8bf12a8057fc5e541fcd4924136a40d 310.8 577.2 103.6 192.4 80 64 80 80 0 0 +locus_6:5:0:17 17 5 181 181 1 181 1 181 181 0 100.0 100 100 1.7899999999999999e-140 380.0 5 locus_6 SALM_640 ^ MOSC domain-containing protein F 543 a11561f2804e2c32c78049f8b9aeb517 181 1f6a45ea291940ef2c17ec1cfdd5fbdd 380.1 705.9 126.7 235.3 80 64 80 80 0 0 +locus_7:6:0:18 18 6 202 202 1 202 1 202 202 0 100.0 100 100 3.71e-154 416.0 6 locus_7 SALM_1501 & India: Vellore G 606 dc94bf1ec4ff9bed2a1f460cbd958656 202 62252b3326997117f127efb88ff09294 424.2 787.8 141.4 262.6 80 64 80 80 0 0 +locus_8:7:0:19 19 7 214 214 1 214 1 214 214 0 100.0 100 100 1.75e-157 426.0 7 locus_8 SALM_756 * DNA polymerase III subunit delta' H 642 7ebe74afecf146ec4db816c8deced64f 214 2449629747a7c58f0f2cad411db87178 449.4 834.6 149.8 278.2 80 64 80 80 0 0 +locus_9:8:0:20 20 8 228 228 1 228 1 228 228 0 100.0 100 100 6.32e-172 463.0 8 locus_9 SALM_7353 1 fimbrial assembly chaperone I 684 41ebb36872854b2b33c8c028e23d8ad1 228 a75a93991228940fb46d917f238beb5a 478.8 889.2 159.6 296.4 80 64 80 80 0 0 diff --git a/tests/test_data/outputs/report/normal/G2/nucleotide.hits.txt b/tests/test_data/outputs/report/normal/G2/nucleotide.hits.txt new file mode 100755 index 0000000..ca5e559 --- /dev/null +++ b/tests/test_data/outputs/report/normal/G2/nucleotide.hits.txt @@ -0,0 +1,21 @@ +query_name qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore seq_id locus_name locus_name_alt locus_product locus_description locus_uid dna_seq_len dna_seq_hash aa_seq_len aa_seq_hash dna_min_len dna_max_len aa_min_len aa_max_len dna_min_ident aa_min_ident min_dna_match_cov min_aa_match_cov count_int_stops dna_ambig_count +locus_1:0:0:0 0 0 102 102 1 102 1 102 102 0 100.0 100 100 plus 1.8e-51 189 0 locus_1 SALM_11273 ! hypothetical protein 1 102 d17b02d12afa7f832ee37df6f24a8f55 34 84ac553cb45dd790c497c27152888f02 71.4 132.6 23.8 44.2 80 64 80 80 0 0 +locus_10:9:0:1 1 9 762 762 1 762 1 762 762 0 100.0 100 100 plus 0.0 1408 9 locus_10 SALM_1891 200.96 5'/3'-nucleotidase SurE J 762 fe04d17ec353c08b903c85fc0ca4dc02 254 bd09702e070040e0fc8d2ec3b830812c 533.4 990.6 177.8 330.2 80 64 80 80 0 0 +locus_11:10:0:2 2 10 858 858 1 858 1 858 858 0 100.0 100 100 plus 0.0 1585 10 locus_11 SALM_1452 | 1-phosphofructokinase K 858 5b128d659955716833ce42f2bb060212 286 4daecf1a6ccae76e1d97a4ce9ee4ff0b 600.6 1115.4 200.2 371.8 80 64 80 80 0 0 +locus_12:11:0:3 3 11 972 972 1 972 1 972 972 0 100.0 100 100 plus 0.0 1796 11 locus_12 SALM_11020 _ 1 L 972 eb72da68c159497d5f0c8eeddc51b5ae 324 a1f16dd269dc295e715f2d00f9c26b43 680.4 1263.6 226.8 421.2 80 64 80 80 0 0 +locus_13:12:0:4 4 12 1098 1098 1 1098 1 1098 1098 0 100.0 100 100 plus 0.0 2028 12 locus_13 SALM_1934 - 3.9 M 1098 8f300259dcb46224bdc1fe5273107324 366 b16bc26e42f5bc4327504b1ec8b2d53d 768.6 1427.4 256.2 475.8 80 64 80 80 0 0 +locus_14:13:0:5 5 13 1281 1281 1 1281 1 1281 1281 0 100.0 100 100 plus 0.0 2366 13 locus_14 SALM_2871 + @ N 1281 b9060019038526aa6fc38d2f7510edc6 427 ed11561b2bedaa12c7a28eb0e9346101 896.7 1665.3 298.9 555.1 80 64 80 80 0 0 +locus_15:14:0:6 6 14 1434 1434 1 1434 1 1434 1434 0 100.0 100 100 plus 0.0 2649 14 locus_15 SALM_583 = DMT family transporter O 1434 bc98c2fe196a68a79036814396513a8d 478 16cb2acec887d5861327e04f2705b8ce 1003.8 1864.2 334.6 621.4 80 64 80 80 0 0 +locus_16:15:0:7 7 15 1464 1464 1 1464 1 1464 1464 0 100.0 100 100 plus 0.0 2704 15 locus_16 SALM_780 < murein transglycosylase A P 1464 16e55766c603fe33c9e75d8e81743ae2 488 b20314e55f9713235e9c4ea5817b56df 1024.8 1903.2 341.6 634.4 80 64 80 80 0 0 +locus_17:16:0:8 8 16 1836 1836 1 1836 1 1836 1836 0 100.0 100 100 plus 0.0 3391 16 locus_17 SALM_1937 > GTPase HflX Q 1836 a0d97d985483413f3c18bfe5833ae9ce 612 d6012eddc7a6e8d7d40761d00ed71a5a 1285.2 2386.8 428.4 795.6 80 64 80 80 0 0 +locus_18:17:0:9 9 17 1914 1914 1 1914 1 1914 1914 0 100.0 100 100 plus 0.0 3535 17 locus_18 SALM_1997 ? biosynthetic arginine decarboxylase R 1914 b3021e979faa7600756c06dfadfcf14c 638 e68e83956ee1d4c685571e5348c8def1 1339.8 2488.2 446.6 829.4 80 64 80 80 0 0 +locus_19:18:0:10 10 18 2037 2037 1 2037 1 2037 2037 0 100.0 100 100 plus 0.0 3762 18 locus_19 SALM_9926 , https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662 S 2037 a012eee23637b48e39b00808a057e35d 679 1302fea1dbd5bb756db34e09b863ad44 1425.9 2648.1 475.3 882.7 80 64 80 80 0 0 +locus_2:1:0:11 11 1 285 285 1 285 1 285 285 0 100.0 100 100 plus 1.03e-152 527 1 locus_2 SALM_120 @ outer membrane protein B 285 e35184c8ff18e9116fc8faef20532f56 95 2d9e7f7f0d11bf02db860b0799e7924d 199.5 370.5 66.5 123.5 80 64 80 80 0 0 +locus_20:19:0:12 12 19 4935 4935 1 4935 1 4935 4935 0 100.0 100 100 plus 0.0 9114 19 locus_20 SALM_6064 . alpha-2-macroglobulin family protein T 4935 4461918e985715e4a2b07494e1f91326 1645 12ff39a1933fa478729b142976b0a659 3454.5 6415.5 1151.5 2138.5 80 64 80 80 0 0 +locus_3:2:0:13 13 2 327 327 1 327 1 327 327 0 100.0 100 100 plus 5.34e-176 604 2 locus_3 SALM_2016 # tRNA (guanosine(46)-N7)-methyltransferase TrmB C 327 670705cd2a59c4a23a897ac656a888fe 109 4277f8bbf1fd6682001da089fea83d04 228.9 425.1 76.3 141.7 80 64 80 80 0 0 +locus_4:3:0:14 14 3 417 417 1 417 1 417 417 0 100.0 100 100 plus 0.0 771 3 locus_4 SALM_8644 $ AZ624_004720 AZ624_004720 417 ac1b21798c0f672ad26f5a91ea278590 139 0c25367401155278f34832f184ab44e6 291.9 542.1 97.3 180.7 80 64 80 80 0 0 +locus_5:4:0:15 15 4 444 444 1 444 1 444 444 0 100.0 100 100 plus 0.0 821 4 locus_5 SALM_1876 % SPI-1 type III secretion system invasion lipoprotein InvH E 444 d00defcca8588f21ce16fa1d0ac13389 148 c8bf12a8057fc5e541fcd4924136a40d 310.8 577.2 103.6 192.4 80 64 80 80 0 0 +locus_6:5:0:16 16 5 543 543 1 543 1 543 543 0 100.0 100 100 plus 0.0 1003 5 locus_6 SALM_640 ^ MOSC domain-containing protein F 543 a11561f2804e2c32c78049f8b9aeb517 181 1f6a45ea291940ef2c17ec1cfdd5fbdd 380.1 705.9 126.7 235.3 80 64 80 80 0 0 +locus_7:6:0:17 17 6 606 606 1 606 1 606 606 0 100.0 100 100 plus 0.0 1120 6 locus_7 SALM_1501 & India: Vellore G 606 dc94bf1ec4ff9bed2a1f460cbd958656 202 62252b3326997117f127efb88ff09294 424.2 787.8 141.4 262.6 80 64 80 80 0 0 +locus_8:7:0:18 18 7 642 642 1 642 1 642 642 0 100.0 100 100 plus 0.0 1186 7 locus_8 SALM_756 * DNA polymerase III subunit delta' H 642 7ebe74afecf146ec4db816c8deced64f 214 2449629747a7c58f0f2cad411db87178 449.4 834.6 149.8 278.2 80 64 80 80 0 0 +locus_9:8:0:19 19 8 684 684 1 684 1 684 684 0 100.0 100 100 plus 0.0 1264 8 locus_9 SALM_7353 1 fimbrial assembly chaperone I 684 41ebb36872854b2b33c8c028e23d8ad1 228 a75a93991228940fb46d917f238beb5a 478.8 889.2 159.6 296.4 80 64 80 80 0 0 diff --git a/tests/test_data/outputs/report/normal/G2/profile.json b/tests/test_data/outputs/report/normal/G2/profile.json new file mode 100755 index 0000000..3b8c03f --- /dev/null +++ b/tests/test_data/outputs/report/normal/G2/profile.json @@ -0,0 +1,24 @@ +{ + "G2": { + "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", + "locus_2": "e35184c8ff18e9116fc8faef20532f56", + "locus_3": "670705cd2a59c4a23a897ac656a888fe", + "locus_4": "ac1b21798c0f672ad26f5a91ea278590", + "locus_5": "d00defcca8588f21ce16fa1d0ac13389", + "locus_6": "a11561f2804e2c32c78049f8b9aeb517", + "locus_7": "dc94bf1ec4ff9bed2a1f460cbd958656", + "locus_8": "7ebe74afecf146ec4db816c8deced64f", + "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", + "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", + "locus_11": "5b128d659955716833ce42f2bb060212", + "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", + "locus_13": "8f300259dcb46224bdc1fe5273107324", + "locus_14": "b9060019038526aa6fc38d2f7510edc6", + "locus_15": "bc98c2fe196a68a79036814396513a8d", + "locus_16": "16e55766c603fe33c9e75d8e81743ae2", + "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", + "locus_18": "b3021e979faa7600756c06dfadfcf14c", + "locus_19": "a012eee23637b48e39b00808a057e35d", + "locus_20": "4461918e985715e4a2b07494e1f91326" + } +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/normal/G2/protein.hits.txt b/tests/test_data/outputs/report/normal/G2/protein.hits.txt new file mode 100755 index 0000000..974fe9e --- /dev/null +++ b/tests/test_data/outputs/report/normal/G2/protein.hits.txt @@ -0,0 +1,21 @@ +query_name qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore seq_id locus_name locus_name_alt locus_product locus_description locus_uid dna_seq_len dna_seq_hash aa_seq_len aa_seq_hash dna_min_len dna_max_len aa_min_len aa_max_len dna_min_ident aa_min_ident min_dna_match_cov min_aa_match_cov count_int_stops dna_ambig_count +locus_1:0:0:0 0 0 34 34 1 34 1 34 34 0 100.0 100 100 4.94e-20 64.3 0 locus_1 SALM_11273 ! hypothetical protein 1 102 d17b02d12afa7f832ee37df6f24a8f55 34 84ac553cb45dd790c497c27152888f02 71.4 132.6 23.8 44.2 80 64 80 80 0 0 +locus_10:9:0:1 1 9 254 254 1 254 1 254 254 0 100.0 100 100 0.0 514.0 9 locus_10 SALM_1891 200.96 5'/3'-nucleotidase SurE J 762 fe04d17ec353c08b903c85fc0ca4dc02 254 bd09702e070040e0fc8d2ec3b830812c 533.4 990.6 177.8 330.2 80 64 80 80 0 0 +locus_11:10:0:2 2 10 286 286 1 286 1 286 286 0 100.0 100 100 0.0 579.0 10 locus_11 SALM_1452 | 1-phosphofructokinase K 858 5b128d659955716833ce42f2bb060212 286 4daecf1a6ccae76e1d97a4ce9ee4ff0b 600.6 1115.4 200.2 371.8 80 64 80 80 0 0 +locus_12:11:0:3 3 11 324 324 1 324 1 324 324 0 100.0 100 100 0.0 640.0 11 locus_12 SALM_11020 _ 1 L 972 eb72da68c159497d5f0c8eeddc51b5ae 324 a1f16dd269dc295e715f2d00f9c26b43 680.4 1263.6 226.8 421.2 80 64 80 80 0 0 +locus_13:12:0:4 4 12 366 366 1 366 1 366 366 0 100.0 100 100 0.0 754.0 12 locus_13 SALM_1934 - 3.9 M 1098 8f300259dcb46224bdc1fe5273107324 366 b16bc26e42f5bc4327504b1ec8b2d53d 768.6 1427.4 256.2 475.8 80 64 80 80 0 0 +locus_14:13:0:5 5 13 427 427 1 427 1 427 427 0 100.0 100 100 0.0 869.0 13 locus_14 SALM_2871 + @ N 1281 b9060019038526aa6fc38d2f7510edc6 427 ed11561b2bedaa12c7a28eb0e9346101 896.7 1665.3 298.9 555.1 80 64 80 80 0 0 +locus_15:14:0:6 6 14 478 478 1 478 1 478 478 0 100.0 100 100 0.0 972.0 14 locus_15 SALM_583 = DMT family transporter O 1434 bc98c2fe196a68a79036814396513a8d 478 16cb2acec887d5861327e04f2705b8ce 1003.8 1864.2 334.6 621.4 80 64 80 80 0 0 +locus_16:15:0:7 7 15 488 488 1 488 1 488 488 0 100.0 100 100 0.0 1004.0 15 locus_16 SALM_780 < murein transglycosylase A P 1464 16e55766c603fe33c9e75d8e81743ae2 488 b20314e55f9713235e9c4ea5817b56df 1024.8 1903.2 341.6 634.4 80 64 80 80 0 0 +locus_17:16:0:8 8 16 612 612 1 612 1 612 612 0 100.0 100 100 0.0 1241.0 16 locus_17 SALM_1937 > GTPase HflX Q 1836 a0d97d985483413f3c18bfe5833ae9ce 612 d6012eddc7a6e8d7d40761d00ed71a5a 1285.2 2386.8 428.4 795.6 80 64 80 80 0 0 +locus_18:17:0:9 9 17 638 638 1 638 1 638 638 0 100.0 100 100 0.0 1326.0 17 locus_18 SALM_1997 ? biosynthetic arginine decarboxylase R 1914 b3021e979faa7600756c06dfadfcf14c 638 e68e83956ee1d4c685571e5348c8def1 1339.8 2488.2 446.6 829.4 80 64 80 80 0 0 +locus_19:18:0:10 10 18 679 679 1 679 1 679 679 0 100.0 100 100 0.0 1419.0 18 locus_19 SALM_9926 , https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662 S 2037 a012eee23637b48e39b00808a057e35d 679 1302fea1dbd5bb756db34e09b863ad44 1425.9 2648.1 475.3 882.7 80 64 80 80 0 0 +locus_2:1:0:11 11 1 95 95 1 95 1 95 95 0 100.0 100 100 1.2299999999999998e-68 191.0 1 locus_2 SALM_120 @ outer membrane protein B 285 e35184c8ff18e9116fc8faef20532f56 95 2d9e7f7f0d11bf02db860b0799e7924d 199.5 370.5 66.5 123.5 80 64 80 80 0 0 +locus_20:19:0:12 12 19 1645 1645 1 1645 1 1645 1645 0 100.0 100 100 0.0 3332.0 19 locus_20 SALM_6064 . alpha-2-macroglobulin family protein T 4935 4461918e985715e4a2b07494e1f91326 1645 12ff39a1933fa478729b142976b0a659 3454.5 6415.5 1151.5 2138.5 80 64 80 80 0 0 +locus_3:2:0:13 13 2 109 109 1 109 1 109 109 0 100.0 100 100 3.09e-81 224.0 2 locus_3 SALM_2016 # tRNA (guanosine(46)-N7)-methyltransferase TrmB C 327 670705cd2a59c4a23a897ac656a888fe 109 4277f8bbf1fd6682001da089fea83d04 228.9 425.1 76.3 141.7 80 64 80 80 0 0 +locus_4:3:0:14 14 3 139 139 1 139 1 139 139 0 100.0 100 100 2.34e-106 290.0 3 locus_4 SALM_8644 $ AZ624_004720 AZ624_004720 417 ac1b21798c0f672ad26f5a91ea278590 139 0c25367401155278f34832f184ab44e6 291.9 542.1 97.3 180.7 80 64 80 80 0 0 +locus_5:4:0:15 15 4 148 148 1 148 1 148 148 0 100.0 100 100 1.0400000000000001e-110 302.0 4 locus_5 SALM_1876 % SPI-1 type III secretion system invasion lipoprotein InvH E 444 d00defcca8588f21ce16fa1d0ac13389 148 c8bf12a8057fc5e541fcd4924136a40d 310.8 577.2 103.6 192.4 80 64 80 80 0 0 +locus_6:5:0:16 16 5 181 181 1 181 1 181 181 0 100.0 100 100 1.7899999999999999e-140 380.0 5 locus_6 SALM_640 ^ MOSC domain-containing protein F 543 a11561f2804e2c32c78049f8b9aeb517 181 1f6a45ea291940ef2c17ec1cfdd5fbdd 380.1 705.9 126.7 235.3 80 64 80 80 0 0 +locus_7:6:0:17 17 6 202 202 1 202 1 202 202 0 100.0 100 100 3.71e-154 416.0 6 locus_7 SALM_1501 & India: Vellore G 606 dc94bf1ec4ff9bed2a1f460cbd958656 202 62252b3326997117f127efb88ff09294 424.2 787.8 141.4 262.6 80 64 80 80 0 0 +locus_8:7:0:18 18 7 214 214 1 214 1 214 214 0 100.0 100 100 1.75e-157 426.0 7 locus_8 SALM_756 * DNA polymerase III subunit delta' H 642 7ebe74afecf146ec4db816c8deced64f 214 2449629747a7c58f0f2cad411db87178 449.4 834.6 149.8 278.2 80 64 80 80 0 0 +locus_9:8:0:19 19 8 228 228 1 228 1 228 228 0 100.0 100 100 6.32e-172 463.0 8 locus_9 SALM_7353 1 fimbrial assembly chaperone I 684 41ebb36872854b2b33c8c028e23d8ad1 228 a75a93991228940fb46d917f238beb5a 478.8 889.2 159.6 296.4 80 64 80 80 0 0 diff --git a/tests/test_data/outputs/report/normal/G3/nucleotide.hits.txt b/tests/test_data/outputs/report/normal/G3/nucleotide.hits.txt new file mode 100755 index 0000000..9871163 --- /dev/null +++ b/tests/test_data/outputs/report/normal/G3/nucleotide.hits.txt @@ -0,0 +1,21 @@ +query_name qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore seq_id locus_name locus_name_alt locus_product locus_description locus_uid dna_seq_len dna_seq_hash aa_seq_len aa_seq_hash dna_min_len dna_max_len aa_min_len aa_max_len dna_min_ident aa_min_ident min_dna_match_cov min_aa_match_cov count_int_stops dna_ambig_count +locus_1:0:0:0 0 0 102 102 1 102 1 102 102 0 100.0 100 100 plus 1.8e-51 189 0 locus_1 SALM_11273 ! hypothetical protein 1 102 d17b02d12afa7f832ee37df6f24a8f55 34 84ac553cb45dd790c497c27152888f02 71.4 132.6 23.8 44.2 80 64 80 80 0 0 +locus_10:9:0:1 1 9 762 762 1 762 1 762 762 0 100.0 100 100 plus 0.0 1408 9 locus_10 SALM_1891 200.96 5'/3'-nucleotidase SurE J 762 fe04d17ec353c08b903c85fc0ca4dc02 254 bd09702e070040e0fc8d2ec3b830812c 533.4 990.6 177.8 330.2 80 64 80 80 0 0 +locus_11:10:0:2 2 10 858 858 1 858 1 858 858 19 97.786 100 100 plus 0.0 1480 10 locus_11 SALM_1452 | 1-phosphofructokinase K 858 5b128d659955716833ce42f2bb060212 286 4daecf1a6ccae76e1d97a4ce9ee4ff0b 600.6 1115.4 200.2 371.8 80 64 80 80 0 0 +locus_12:11:0:3 3 11 972 972 1 972 1 972 972 0 100.0 100 100 plus 0.0 1796 11 locus_12 SALM_11020 _ 1 L 972 eb72da68c159497d5f0c8eeddc51b5ae 324 a1f16dd269dc295e715f2d00f9c26b43 680.4 1263.6 226.8 421.2 80 64 80 80 0 0 +locus_13:12:0:4 4 12 1098 1098 1 1098 1 1098 1098 0 100.0 100 100 plus 0.0 2028 12 locus_13 SALM_1934 - 3.9 M 1098 8f300259dcb46224bdc1fe5273107324 366 b16bc26e42f5bc4327504b1ec8b2d53d 768.6 1427.4 256.2 475.8 80 64 80 80 0 0 +locus_14:13:0:5 5 13 1281 1281 1 1281 1 1281 1281 11 99.141 100 100 plus 0.0 2305 13 locus_14 SALM_2871 + @ N 1281 b9060019038526aa6fc38d2f7510edc6 427 ed11561b2bedaa12c7a28eb0e9346101 896.7 1665.3 298.9 555.1 80 64 80 80 0 0 +locus_15:14:0:6 6 14 1434 1434 1 1434 1 1434 1434 0 100.0 100 100 plus 0.0 2649 14 locus_15 SALM_583 = DMT family transporter O 1434 bc98c2fe196a68a79036814396513a8d 478 16cb2acec887d5861327e04f2705b8ce 1003.8 1864.2 334.6 621.4 80 64 80 80 0 0 +locus_16:15:0:7 7 15 1464 1464 1 1464 1 1464 1464 15 98.975 100 100 plus 0.0 2621 15 locus_16 SALM_780 < murein transglycosylase A P 1464 16e55766c603fe33c9e75d8e81743ae2 488 b20314e55f9713235e9c4ea5817b56df 1024.8 1903.2 341.6 634.4 80 64 80 80 0 0 +locus_17:16:0:8 8 16 1836 1836 1 1836 1 1836 1836 0 100.0 100 100 plus 0.0 3391 16 locus_17 SALM_1937 > GTPase HflX Q 1836 a0d97d985483413f3c18bfe5833ae9ce 612 d6012eddc7a6e8d7d40761d00ed71a5a 1285.2 2386.8 428.4 795.6 80 64 80 80 0 0 +locus_18:17:0:9 9 17 1914 1914 1 1914 1 1914 1914 0 100.0 100 100 plus 0.0 3535 17 locus_18 SALM_1997 ? biosynthetic arginine decarboxylase R 1914 b3021e979faa7600756c06dfadfcf14c 638 e68e83956ee1d4c685571e5348c8def1 1339.8 2488.2 446.6 829.4 80 64 80 80 0 0 +locus_19:18:0:10 10 18 2037 2037 1 2037 1 2037 2037 16 99.215 100 100 plus 0.0 3674 18 locus_19 SALM_9926 , https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662 S 2037 a012eee23637b48e39b00808a057e35d 679 1302fea1dbd5bb756db34e09b863ad44 1425.9 2648.1 475.3 882.7 80 64 80 80 0 0 +locus_2:1:0:11 11 1 285 285 1 285 1 285 285 17 94.035 100 100 plus 2.3100000000000004e-124 433 1 locus_2 SALM_120 @ outer membrane protein B 285 e35184c8ff18e9116fc8faef20532f56 95 2d9e7f7f0d11bf02db860b0799e7924d 199.5 370.5 66.5 123.5 80 64 80 80 0 0 +locus_20:19:0:12 12 19 4935 4935 1 4935 1 4935 4935 0 100.0 100 100 plus 0.0 9114 19 locus_20 SALM_6064 . alpha-2-macroglobulin family protein T 4935 4461918e985715e4a2b07494e1f91326 1645 12ff39a1933fa478729b142976b0a659 3454.5 6415.5 1151.5 2138.5 80 64 80 80 0 0 +locus_3:2:0:13 13 2 327 327 1 327 1 327 327 0 100.0 100 100 plus 5.34e-176 604 2 locus_3 SALM_2016 # tRNA (guanosine(46)-N7)-methyltransferase TrmB C 327 670705cd2a59c4a23a897ac656a888fe 109 4277f8bbf1fd6682001da089fea83d04 228.9 425.1 76.3 141.7 80 64 80 80 0 0 +locus_4:3:0:14 14 3 417 417 1 417 1 417 417 11 97.362 100 100 plus 0.0 710 3 locus_4 SALM_8644 $ AZ624_004720 AZ624_004720 417 ac1b21798c0f672ad26f5a91ea278590 139 0c25367401155278f34832f184ab44e6 291.9 542.1 97.3 180.7 80 64 80 80 0 0 +locus_5:4:0:15 15 4 444 444 1 444 1 444 444 15 96.622 100 100 plus 0.0 737 4 locus_5 SALM_1876 % SPI-1 type III secretion system invasion lipoprotein InvH E 444 d00defcca8588f21ce16fa1d0ac13389 148 c8bf12a8057fc5e541fcd4924136a40d 310.8 577.2 103.6 192.4 80 64 80 80 0 0 +locus_6:5:0:16 16 5 543 543 1 543 1 543 543 0 100.0 100 100 plus 0.0 1003 5 locus_6 SALM_640 ^ MOSC domain-containing protein F 543 a11561f2804e2c32c78049f8b9aeb517 181 1f6a45ea291940ef2c17ec1cfdd5fbdd 380.1 705.9 126.7 235.3 80 64 80 80 0 0 +locus_7:6:0:17 17 6 606 606 1 606 1 606 606 15 97.525 100 100 plus 0.0 1037 6 locus_7 SALM_1501 & India: Vellore G 606 dc94bf1ec4ff9bed2a1f460cbd958656 202 62252b3326997117f127efb88ff09294 424.2 787.8 141.4 262.6 80 64 80 80 0 0 +locus_8:7:0:18 18 7 642 642 1 642 1 642 642 0 100.0 100 100 plus 0.0 1186 7 locus_8 SALM_756 * DNA polymerase III subunit delta' H 642 7ebe74afecf146ec4db816c8deced64f 214 2449629747a7c58f0f2cad411db87178 449.4 834.6 149.8 278.2 80 64 80 80 0 0 +locus_9:8:0:19 19 8 684 684 1 684 1 684 684 0 100.0 100 100 plus 0.0 1264 8 locus_9 SALM_7353 1 fimbrial assembly chaperone I 684 41ebb36872854b2b33c8c028e23d8ad1 228 a75a93991228940fb46d917f238beb5a 478.8 889.2 159.6 296.4 80 64 80 80 0 0 diff --git a/tests/test_data/outputs/report/normal/G3/profile.json b/tests/test_data/outputs/report/normal/G3/profile.json new file mode 100755 index 0000000..fbbe3af --- /dev/null +++ b/tests/test_data/outputs/report/normal/G3/profile.json @@ -0,0 +1,24 @@ +{ + "G3": { + "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", + "locus_2": "8b70e777f6bbf2c91ff75947824b5976", + "locus_3": "670705cd2a59c4a23a897ac656a888fe", + "locus_4": "73790840c76943caac0ebb3b2b3f0b98", + "locus_5": "8cf4341689dd00f74adfcc43d1f4a35e", + "locus_6": "a11561f2804e2c32c78049f8b9aeb517", + "locus_7": "49d9878c9d3071aa1d2f26cb947b784c", + "locus_8": "7ebe74afecf146ec4db816c8deced64f", + "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", + "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", + "locus_11": "c4266f2f24fdd8e039113c6b0955af9f", + "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", + "locus_13": "8f300259dcb46224bdc1fe5273107324", + "locus_14": "2fa0b06ed72e36b4071cab9d0b4f87d0", + "locus_15": "bc98c2fe196a68a79036814396513a8d", + "locus_16": "a9b3cb97dac3cda6e932a49bf9a507bd", + "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", + "locus_18": "b3021e979faa7600756c06dfadfcf14c", + "locus_19": "de32372598811d63bcc1a0eaf6872644", + "locus_20": "4461918e985715e4a2b07494e1f91326" + } +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/normal/G3/protein.hits.txt b/tests/test_data/outputs/report/normal/G3/protein.hits.txt new file mode 100755 index 0000000..c9b1c8a --- /dev/null +++ b/tests/test_data/outputs/report/normal/G3/protein.hits.txt @@ -0,0 +1,21 @@ +query_name qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore seq_id locus_name locus_name_alt locus_product locus_description locus_uid dna_seq_len dna_seq_hash aa_seq_len aa_seq_hash dna_min_len dna_max_len aa_min_len aa_max_len dna_min_ident aa_min_ident min_dna_match_cov min_aa_match_cov count_int_stops dna_ambig_count +locus_1:0:0:0 0 0 34 34 1 34 1 34 34 0 100.0 100 100 4.94e-20 64.3 0 locus_1 SALM_11273 ! hypothetical protein 1 102 d17b02d12afa7f832ee37df6f24a8f55 34 84ac553cb45dd790c497c27152888f02 71.4 132.6 23.8 44.2 80 64 80 80 0 0 +locus_10:9:0:1 1 9 254 254 1 254 1 254 254 0 100.0 100 100 0.0 514.0 9 locus_10 SALM_1891 200.96 5'/3'-nucleotidase SurE J 762 fe04d17ec353c08b903c85fc0ca4dc02 254 bd09702e070040e0fc8d2ec3b830812c 533.4 990.6 177.8 330.2 80 64 80 80 0 0 +locus_11:10:0:2 2 10 286 286 1 286 1 286 286 17 94.056 100 100 0.0 518.0 10 locus_11 SALM_1452 | 1-phosphofructokinase K 858 5b128d659955716833ce42f2bb060212 286 4daecf1a6ccae76e1d97a4ce9ee4ff0b 600.6 1115.4 200.2 371.8 80 64 80 80 0 0 +locus_12:11:0:3 3 11 324 324 1 324 1 324 324 0 100.0 100 100 0.0 640.0 11 locus_12 SALM_11020 _ 1 L 972 eb72da68c159497d5f0c8eeddc51b5ae 324 a1f16dd269dc295e715f2d00f9c26b43 680.4 1263.6 226.8 421.2 80 64 80 80 0 0 +locus_13:12:0:4 4 12 366 366 1 366 1 366 366 0 100.0 100 100 0.0 754.0 12 locus_13 SALM_1934 - 3.9 M 1098 8f300259dcb46224bdc1fe5273107324 366 b16bc26e42f5bc4327504b1ec8b2d53d 768.6 1427.4 256.2 475.8 80 64 80 80 0 0 +locus_14:13:0:5 5 13 427 427 1 427 1 427 427 9 97.892 100 100 0.0 846.0 13 locus_14 SALM_2871 + @ N 1281 b9060019038526aa6fc38d2f7510edc6 427 ed11561b2bedaa12c7a28eb0e9346101 896.7 1665.3 298.9 555.1 80 64 80 80 0 0 +locus_15:14:0:6 6 14 478 478 1 478 1 478 478 0 100.0 100 100 0.0 972.0 14 locus_15 SALM_583 = DMT family transporter O 1434 bc98c2fe196a68a79036814396513a8d 478 16cb2acec887d5861327e04f2705b8ce 1003.8 1864.2 334.6 621.4 80 64 80 80 0 0 +locus_16:15:0:7 7 15 488 488 1 488 1 488 488 14 97.131 100 100 0.0 972.0 15 locus_16 SALM_780 < murein transglycosylase A P 1464 16e55766c603fe33c9e75d8e81743ae2 488 b20314e55f9713235e9c4ea5817b56df 1024.8 1903.2 341.6 634.4 80 64 80 80 0 0 +locus_17:16:0:8 8 16 612 612 1 612 1 612 612 0 100.0 100 100 0.0 1241.0 16 locus_17 SALM_1937 > GTPase HflX Q 1836 a0d97d985483413f3c18bfe5833ae9ce 612 d6012eddc7a6e8d7d40761d00ed71a5a 1285.2 2386.8 428.4 795.6 80 64 80 80 0 0 +locus_18:17:0:9 9 17 638 638 1 638 1 638 638 0 100.0 100 100 0.0 1326.0 17 locus_18 SALM_1997 ? biosynthetic arginine decarboxylase R 1914 b3021e979faa7600756c06dfadfcf14c 638 e68e83956ee1d4c685571e5348c8def1 1339.8 2488.2 446.6 829.4 80 64 80 80 0 0 +locus_19:18:0:10 10 18 679 679 1 679 1 679 679 8 98.822 100 100 0.0 1403.0 18 locus_19 SALM_9926 , https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662 S 2037 a012eee23637b48e39b00808a057e35d 679 1302fea1dbd5bb756db34e09b863ad44 1425.9 2648.1 475.3 882.7 80 64 80 80 0 0 +locus_2:1:0:11 11 1 95 95 1 95 1 95 95 11 88.421 100 100 2.89e-51 147.0 1 locus_2 SALM_120 @ outer membrane protein B 285 e35184c8ff18e9116fc8faef20532f56 95 2d9e7f7f0d11bf02db860b0799e7924d 199.5 370.5 66.5 123.5 80 64 80 80 0 0 +locus_20:19:0:12 12 19 1645 1645 1 1645 1 1645 1645 0 100.0 100 100 0.0 3332.0 19 locus_20 SALM_6064 . alpha-2-macroglobulin family protein T 4935 4461918e985715e4a2b07494e1f91326 1645 12ff39a1933fa478729b142976b0a659 3454.5 6415.5 1151.5 2138.5 80 64 80 80 0 0 +locus_3:2:0:13 13 2 109 109 1 109 1 109 109 0 100.0 100 100 3.09e-81 224.0 2 locus_3 SALM_2016 # tRNA (guanosine(46)-N7)-methyltransferase TrmB C 327 670705cd2a59c4a23a897ac656a888fe 109 4277f8bbf1fd6682001da089fea83d04 228.9 425.1 76.3 141.7 80 64 80 80 0 0 +locus_4:3:0:14 14 3 139 139 1 139 1 139 139 9 93.525 100 100 3.6999999999999996e-98 269.0 3 locus_4 SALM_8644 $ AZ624_004720 AZ624_004720 417 ac1b21798c0f672ad26f5a91ea278590 139 0c25367401155278f34832f184ab44e6 291.9 542.1 97.3 180.7 80 64 80 80 0 0 +locus_5:4:0:15 15 4 148 148 1 148 1 148 148 11 92.568 100 100 1.5600000000000001e-97 268.0 4 locus_5 SALM_1876 % SPI-1 type III secretion system invasion lipoprotein InvH E 444 d00defcca8588f21ce16fa1d0ac13389 148 c8bf12a8057fc5e541fcd4924136a40d 310.8 577.2 103.6 192.4 80 64 80 80 0 0 +locus_6:5:0:16 16 5 181 181 1 181 1 181 181 0 100.0 100 100 1.7899999999999999e-140 380.0 5 locus_6 SALM_640 ^ MOSC domain-containing protein F 543 a11561f2804e2c32c78049f8b9aeb517 181 1f6a45ea291940ef2c17ec1cfdd5fbdd 380.1 705.9 126.7 235.3 80 64 80 80 0 0 +locus_7:6:0:17 17 6 202 202 1 202 1 202 202 14 93.069 100 100 2.36e-141 384.0 6 locus_7 SALM_1501 & India: Vellore G 606 dc94bf1ec4ff9bed2a1f460cbd958656 202 62252b3326997117f127efb88ff09294 424.2 787.8 141.4 262.6 80 64 80 80 0 0 +locus_8:7:0:18 18 7 214 214 1 214 1 214 214 0 100.0 100 100 1.75e-157 426.0 7 locus_8 SALM_756 * DNA polymerase III subunit delta' H 642 7ebe74afecf146ec4db816c8deced64f 214 2449629747a7c58f0f2cad411db87178 449.4 834.6 149.8 278.2 80 64 80 80 0 0 +locus_9:8:0:19 19 8 228 228 1 228 1 228 228 0 100.0 100 100 6.32e-172 463.0 8 locus_9 SALM_7353 1 fimbrial assembly chaperone I 684 41ebb36872854b2b33c8c028e23d8ad1 228 a75a93991228940fb46d917f238beb5a 478.8 889.2 159.6 296.4 80 64 80 80 0 0 diff --git a/tests/test_data/outputs/report/normal/G4/nucleotide.hits.txt b/tests/test_data/outputs/report/normal/G4/nucleotide.hits.txt new file mode 100755 index 0000000..ca5e559 --- /dev/null +++ b/tests/test_data/outputs/report/normal/G4/nucleotide.hits.txt @@ -0,0 +1,21 @@ +query_name qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore seq_id locus_name locus_name_alt locus_product locus_description locus_uid dna_seq_len dna_seq_hash aa_seq_len aa_seq_hash dna_min_len dna_max_len aa_min_len aa_max_len dna_min_ident aa_min_ident min_dna_match_cov min_aa_match_cov count_int_stops dna_ambig_count +locus_1:0:0:0 0 0 102 102 1 102 1 102 102 0 100.0 100 100 plus 1.8e-51 189 0 locus_1 SALM_11273 ! hypothetical protein 1 102 d17b02d12afa7f832ee37df6f24a8f55 34 84ac553cb45dd790c497c27152888f02 71.4 132.6 23.8 44.2 80 64 80 80 0 0 +locus_10:9:0:1 1 9 762 762 1 762 1 762 762 0 100.0 100 100 plus 0.0 1408 9 locus_10 SALM_1891 200.96 5'/3'-nucleotidase SurE J 762 fe04d17ec353c08b903c85fc0ca4dc02 254 bd09702e070040e0fc8d2ec3b830812c 533.4 990.6 177.8 330.2 80 64 80 80 0 0 +locus_11:10:0:2 2 10 858 858 1 858 1 858 858 0 100.0 100 100 plus 0.0 1585 10 locus_11 SALM_1452 | 1-phosphofructokinase K 858 5b128d659955716833ce42f2bb060212 286 4daecf1a6ccae76e1d97a4ce9ee4ff0b 600.6 1115.4 200.2 371.8 80 64 80 80 0 0 +locus_12:11:0:3 3 11 972 972 1 972 1 972 972 0 100.0 100 100 plus 0.0 1796 11 locus_12 SALM_11020 _ 1 L 972 eb72da68c159497d5f0c8eeddc51b5ae 324 a1f16dd269dc295e715f2d00f9c26b43 680.4 1263.6 226.8 421.2 80 64 80 80 0 0 +locus_13:12:0:4 4 12 1098 1098 1 1098 1 1098 1098 0 100.0 100 100 plus 0.0 2028 12 locus_13 SALM_1934 - 3.9 M 1098 8f300259dcb46224bdc1fe5273107324 366 b16bc26e42f5bc4327504b1ec8b2d53d 768.6 1427.4 256.2 475.8 80 64 80 80 0 0 +locus_14:13:0:5 5 13 1281 1281 1 1281 1 1281 1281 0 100.0 100 100 plus 0.0 2366 13 locus_14 SALM_2871 + @ N 1281 b9060019038526aa6fc38d2f7510edc6 427 ed11561b2bedaa12c7a28eb0e9346101 896.7 1665.3 298.9 555.1 80 64 80 80 0 0 +locus_15:14:0:6 6 14 1434 1434 1 1434 1 1434 1434 0 100.0 100 100 plus 0.0 2649 14 locus_15 SALM_583 = DMT family transporter O 1434 bc98c2fe196a68a79036814396513a8d 478 16cb2acec887d5861327e04f2705b8ce 1003.8 1864.2 334.6 621.4 80 64 80 80 0 0 +locus_16:15:0:7 7 15 1464 1464 1 1464 1 1464 1464 0 100.0 100 100 plus 0.0 2704 15 locus_16 SALM_780 < murein transglycosylase A P 1464 16e55766c603fe33c9e75d8e81743ae2 488 b20314e55f9713235e9c4ea5817b56df 1024.8 1903.2 341.6 634.4 80 64 80 80 0 0 +locus_17:16:0:8 8 16 1836 1836 1 1836 1 1836 1836 0 100.0 100 100 plus 0.0 3391 16 locus_17 SALM_1937 > GTPase HflX Q 1836 a0d97d985483413f3c18bfe5833ae9ce 612 d6012eddc7a6e8d7d40761d00ed71a5a 1285.2 2386.8 428.4 795.6 80 64 80 80 0 0 +locus_18:17:0:9 9 17 1914 1914 1 1914 1 1914 1914 0 100.0 100 100 plus 0.0 3535 17 locus_18 SALM_1997 ? biosynthetic arginine decarboxylase R 1914 b3021e979faa7600756c06dfadfcf14c 638 e68e83956ee1d4c685571e5348c8def1 1339.8 2488.2 446.6 829.4 80 64 80 80 0 0 +locus_19:18:0:10 10 18 2037 2037 1 2037 1 2037 2037 0 100.0 100 100 plus 0.0 3762 18 locus_19 SALM_9926 , https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662 S 2037 a012eee23637b48e39b00808a057e35d 679 1302fea1dbd5bb756db34e09b863ad44 1425.9 2648.1 475.3 882.7 80 64 80 80 0 0 +locus_2:1:0:11 11 1 285 285 1 285 1 285 285 0 100.0 100 100 plus 1.03e-152 527 1 locus_2 SALM_120 @ outer membrane protein B 285 e35184c8ff18e9116fc8faef20532f56 95 2d9e7f7f0d11bf02db860b0799e7924d 199.5 370.5 66.5 123.5 80 64 80 80 0 0 +locus_20:19:0:12 12 19 4935 4935 1 4935 1 4935 4935 0 100.0 100 100 plus 0.0 9114 19 locus_20 SALM_6064 . alpha-2-macroglobulin family protein T 4935 4461918e985715e4a2b07494e1f91326 1645 12ff39a1933fa478729b142976b0a659 3454.5 6415.5 1151.5 2138.5 80 64 80 80 0 0 +locus_3:2:0:13 13 2 327 327 1 327 1 327 327 0 100.0 100 100 plus 5.34e-176 604 2 locus_3 SALM_2016 # tRNA (guanosine(46)-N7)-methyltransferase TrmB C 327 670705cd2a59c4a23a897ac656a888fe 109 4277f8bbf1fd6682001da089fea83d04 228.9 425.1 76.3 141.7 80 64 80 80 0 0 +locus_4:3:0:14 14 3 417 417 1 417 1 417 417 0 100.0 100 100 plus 0.0 771 3 locus_4 SALM_8644 $ AZ624_004720 AZ624_004720 417 ac1b21798c0f672ad26f5a91ea278590 139 0c25367401155278f34832f184ab44e6 291.9 542.1 97.3 180.7 80 64 80 80 0 0 +locus_5:4:0:15 15 4 444 444 1 444 1 444 444 0 100.0 100 100 plus 0.0 821 4 locus_5 SALM_1876 % SPI-1 type III secretion system invasion lipoprotein InvH E 444 d00defcca8588f21ce16fa1d0ac13389 148 c8bf12a8057fc5e541fcd4924136a40d 310.8 577.2 103.6 192.4 80 64 80 80 0 0 +locus_6:5:0:16 16 5 543 543 1 543 1 543 543 0 100.0 100 100 plus 0.0 1003 5 locus_6 SALM_640 ^ MOSC domain-containing protein F 543 a11561f2804e2c32c78049f8b9aeb517 181 1f6a45ea291940ef2c17ec1cfdd5fbdd 380.1 705.9 126.7 235.3 80 64 80 80 0 0 +locus_7:6:0:17 17 6 606 606 1 606 1 606 606 0 100.0 100 100 plus 0.0 1120 6 locus_7 SALM_1501 & India: Vellore G 606 dc94bf1ec4ff9bed2a1f460cbd958656 202 62252b3326997117f127efb88ff09294 424.2 787.8 141.4 262.6 80 64 80 80 0 0 +locus_8:7:0:18 18 7 642 642 1 642 1 642 642 0 100.0 100 100 plus 0.0 1186 7 locus_8 SALM_756 * DNA polymerase III subunit delta' H 642 7ebe74afecf146ec4db816c8deced64f 214 2449629747a7c58f0f2cad411db87178 449.4 834.6 149.8 278.2 80 64 80 80 0 0 +locus_9:8:0:19 19 8 684 684 1 684 1 684 684 0 100.0 100 100 plus 0.0 1264 8 locus_9 SALM_7353 1 fimbrial assembly chaperone I 684 41ebb36872854b2b33c8c028e23d8ad1 228 a75a93991228940fb46d917f238beb5a 478.8 889.2 159.6 296.4 80 64 80 80 0 0 diff --git a/tests/test_data/outputs/report/normal/G4/profile.json b/tests/test_data/outputs/report/normal/G4/profile.json new file mode 100755 index 0000000..f5ece9e --- /dev/null +++ b/tests/test_data/outputs/report/normal/G4/profile.json @@ -0,0 +1,24 @@ +{ + "G4": { + "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", + "locus_2": "e35184c8ff18e9116fc8faef20532f56", + "locus_3": "670705cd2a59c4a23a897ac656a888fe", + "locus_4": "ac1b21798c0f672ad26f5a91ea278590", + "locus_5": "d00defcca8588f21ce16fa1d0ac13389", + "locus_6": "a11561f2804e2c32c78049f8b9aeb517", + "locus_7": "dc94bf1ec4ff9bed2a1f460cbd958656", + "locus_8": "7ebe74afecf146ec4db816c8deced64f", + "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", + "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", + "locus_11": "5b128d659955716833ce42f2bb060212", + "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", + "locus_13": "8f300259dcb46224bdc1fe5273107324", + "locus_14": "b9060019038526aa6fc38d2f7510edc6", + "locus_15": "bc98c2fe196a68a79036814396513a8d", + "locus_16": "16e55766c603fe33c9e75d8e81743ae2", + "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", + "locus_18": "b3021e979faa7600756c06dfadfcf14c", + "locus_19": "a012eee23637b48e39b00808a057e35d", + "locus_20": "4461918e985715e4a2b07494e1f91326" + } +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/normal/G4/protein.hits.txt b/tests/test_data/outputs/report/normal/G4/protein.hits.txt new file mode 100755 index 0000000..974fe9e --- /dev/null +++ b/tests/test_data/outputs/report/normal/G4/protein.hits.txt @@ -0,0 +1,21 @@ +query_name qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore seq_id locus_name locus_name_alt locus_product locus_description locus_uid dna_seq_len dna_seq_hash aa_seq_len aa_seq_hash dna_min_len dna_max_len aa_min_len aa_max_len dna_min_ident aa_min_ident min_dna_match_cov min_aa_match_cov count_int_stops dna_ambig_count +locus_1:0:0:0 0 0 34 34 1 34 1 34 34 0 100.0 100 100 4.94e-20 64.3 0 locus_1 SALM_11273 ! hypothetical protein 1 102 d17b02d12afa7f832ee37df6f24a8f55 34 84ac553cb45dd790c497c27152888f02 71.4 132.6 23.8 44.2 80 64 80 80 0 0 +locus_10:9:0:1 1 9 254 254 1 254 1 254 254 0 100.0 100 100 0.0 514.0 9 locus_10 SALM_1891 200.96 5'/3'-nucleotidase SurE J 762 fe04d17ec353c08b903c85fc0ca4dc02 254 bd09702e070040e0fc8d2ec3b830812c 533.4 990.6 177.8 330.2 80 64 80 80 0 0 +locus_11:10:0:2 2 10 286 286 1 286 1 286 286 0 100.0 100 100 0.0 579.0 10 locus_11 SALM_1452 | 1-phosphofructokinase K 858 5b128d659955716833ce42f2bb060212 286 4daecf1a6ccae76e1d97a4ce9ee4ff0b 600.6 1115.4 200.2 371.8 80 64 80 80 0 0 +locus_12:11:0:3 3 11 324 324 1 324 1 324 324 0 100.0 100 100 0.0 640.0 11 locus_12 SALM_11020 _ 1 L 972 eb72da68c159497d5f0c8eeddc51b5ae 324 a1f16dd269dc295e715f2d00f9c26b43 680.4 1263.6 226.8 421.2 80 64 80 80 0 0 +locus_13:12:0:4 4 12 366 366 1 366 1 366 366 0 100.0 100 100 0.0 754.0 12 locus_13 SALM_1934 - 3.9 M 1098 8f300259dcb46224bdc1fe5273107324 366 b16bc26e42f5bc4327504b1ec8b2d53d 768.6 1427.4 256.2 475.8 80 64 80 80 0 0 +locus_14:13:0:5 5 13 427 427 1 427 1 427 427 0 100.0 100 100 0.0 869.0 13 locus_14 SALM_2871 + @ N 1281 b9060019038526aa6fc38d2f7510edc6 427 ed11561b2bedaa12c7a28eb0e9346101 896.7 1665.3 298.9 555.1 80 64 80 80 0 0 +locus_15:14:0:6 6 14 478 478 1 478 1 478 478 0 100.0 100 100 0.0 972.0 14 locus_15 SALM_583 = DMT family transporter O 1434 bc98c2fe196a68a79036814396513a8d 478 16cb2acec887d5861327e04f2705b8ce 1003.8 1864.2 334.6 621.4 80 64 80 80 0 0 +locus_16:15:0:7 7 15 488 488 1 488 1 488 488 0 100.0 100 100 0.0 1004.0 15 locus_16 SALM_780 < murein transglycosylase A P 1464 16e55766c603fe33c9e75d8e81743ae2 488 b20314e55f9713235e9c4ea5817b56df 1024.8 1903.2 341.6 634.4 80 64 80 80 0 0 +locus_17:16:0:8 8 16 612 612 1 612 1 612 612 0 100.0 100 100 0.0 1241.0 16 locus_17 SALM_1937 > GTPase HflX Q 1836 a0d97d985483413f3c18bfe5833ae9ce 612 d6012eddc7a6e8d7d40761d00ed71a5a 1285.2 2386.8 428.4 795.6 80 64 80 80 0 0 +locus_18:17:0:9 9 17 638 638 1 638 1 638 638 0 100.0 100 100 0.0 1326.0 17 locus_18 SALM_1997 ? biosynthetic arginine decarboxylase R 1914 b3021e979faa7600756c06dfadfcf14c 638 e68e83956ee1d4c685571e5348c8def1 1339.8 2488.2 446.6 829.4 80 64 80 80 0 0 +locus_19:18:0:10 10 18 679 679 1 679 1 679 679 0 100.0 100 100 0.0 1419.0 18 locus_19 SALM_9926 , https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662 S 2037 a012eee23637b48e39b00808a057e35d 679 1302fea1dbd5bb756db34e09b863ad44 1425.9 2648.1 475.3 882.7 80 64 80 80 0 0 +locus_2:1:0:11 11 1 95 95 1 95 1 95 95 0 100.0 100 100 1.2299999999999998e-68 191.0 1 locus_2 SALM_120 @ outer membrane protein B 285 e35184c8ff18e9116fc8faef20532f56 95 2d9e7f7f0d11bf02db860b0799e7924d 199.5 370.5 66.5 123.5 80 64 80 80 0 0 +locus_20:19:0:12 12 19 1645 1645 1 1645 1 1645 1645 0 100.0 100 100 0.0 3332.0 19 locus_20 SALM_6064 . alpha-2-macroglobulin family protein T 4935 4461918e985715e4a2b07494e1f91326 1645 12ff39a1933fa478729b142976b0a659 3454.5 6415.5 1151.5 2138.5 80 64 80 80 0 0 +locus_3:2:0:13 13 2 109 109 1 109 1 109 109 0 100.0 100 100 3.09e-81 224.0 2 locus_3 SALM_2016 # tRNA (guanosine(46)-N7)-methyltransferase TrmB C 327 670705cd2a59c4a23a897ac656a888fe 109 4277f8bbf1fd6682001da089fea83d04 228.9 425.1 76.3 141.7 80 64 80 80 0 0 +locus_4:3:0:14 14 3 139 139 1 139 1 139 139 0 100.0 100 100 2.34e-106 290.0 3 locus_4 SALM_8644 $ AZ624_004720 AZ624_004720 417 ac1b21798c0f672ad26f5a91ea278590 139 0c25367401155278f34832f184ab44e6 291.9 542.1 97.3 180.7 80 64 80 80 0 0 +locus_5:4:0:15 15 4 148 148 1 148 1 148 148 0 100.0 100 100 1.0400000000000001e-110 302.0 4 locus_5 SALM_1876 % SPI-1 type III secretion system invasion lipoprotein InvH E 444 d00defcca8588f21ce16fa1d0ac13389 148 c8bf12a8057fc5e541fcd4924136a40d 310.8 577.2 103.6 192.4 80 64 80 80 0 0 +locus_6:5:0:16 16 5 181 181 1 181 1 181 181 0 100.0 100 100 1.7899999999999999e-140 380.0 5 locus_6 SALM_640 ^ MOSC domain-containing protein F 543 a11561f2804e2c32c78049f8b9aeb517 181 1f6a45ea291940ef2c17ec1cfdd5fbdd 380.1 705.9 126.7 235.3 80 64 80 80 0 0 +locus_7:6:0:17 17 6 202 202 1 202 1 202 202 0 100.0 100 100 3.71e-154 416.0 6 locus_7 SALM_1501 & India: Vellore G 606 dc94bf1ec4ff9bed2a1f460cbd958656 202 62252b3326997117f127efb88ff09294 424.2 787.8 141.4 262.6 80 64 80 80 0 0 +locus_8:7:0:18 18 7 214 214 1 214 1 214 214 0 100.0 100 100 1.75e-157 426.0 7 locus_8 SALM_756 * DNA polymerase III subunit delta' H 642 7ebe74afecf146ec4db816c8deced64f 214 2449629747a7c58f0f2cad411db87178 449.4 834.6 149.8 278.2 80 64 80 80 0 0 +locus_9:8:0:19 19 8 228 228 1 228 1 228 228 0 100.0 100 100 6.32e-172 463.0 8 locus_9 SALM_7353 1 fimbrial assembly chaperone I 684 41ebb36872854b2b33c8c028e23d8ad1 228 a75a93991228940fb46d917f238beb5a 478.8 889.2 159.6 296.4 80 64 80 80 0 0 diff --git a/tests/test_data/outputs/report/normal/G5/nucleotide.hits.txt b/tests/test_data/outputs/report/normal/G5/nucleotide.hits.txt new file mode 100755 index 0000000..1b3ef48 --- /dev/null +++ b/tests/test_data/outputs/report/normal/G5/nucleotide.hits.txt @@ -0,0 +1,21 @@ +query_name qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore seq_id locus_name locus_name_alt locus_product locus_description locus_uid dna_seq_len dna_seq_hash aa_seq_len aa_seq_hash dna_min_len dna_max_len aa_min_len aa_max_len dna_min_ident aa_min_ident min_dna_match_cov min_aa_match_cov count_int_stops dna_ambig_count +locus_1:0:0:0 0 0 102 102 1 102 1 102 103 1 97.087 100 100 plus 1.81e-46 172 0 locus_1 SALM_11273 ! hypothetical protein 1 102 d17b02d12afa7f832ee37df6f24a8f55 34 84ac553cb45dd790c497c27152888f02 71.4 132.6 23.8 44.2 80 64 80 80 0 0 +locus_10:9:0:1 1 9 762 762 1 762 1 762 762 0 100.0 100 100 plus 0.0 1408 9 locus_10 SALM_1891 200.96 5'/3'-nucleotidase SurE J 762 fe04d17ec353c08b903c85fc0ca4dc02 254 bd09702e070040e0fc8d2ec3b830812c 533.4 990.6 177.8 330.2 80 64 80 80 0 0 +locus_11:10:0:2 2 10 858 858 1 858 1 858 858 19 97.786 100 100 plus 0.0 1480 10 locus_11 SALM_1452 | 1-phosphofructokinase K 858 5b128d659955716833ce42f2bb060212 286 4daecf1a6ccae76e1d97a4ce9ee4ff0b 600.6 1115.4 200.2 371.8 80 64 80 80 0 0 +locus_12:11:0:3 3 11 972 972 1 972 1 972 972 0 100.0 100 100 plus 0.0 1796 11 locus_12 SALM_11020 _ 1 L 972 eb72da68c159497d5f0c8eeddc51b5ae 324 a1f16dd269dc295e715f2d00f9c26b43 680.4 1263.6 226.8 421.2 80 64 80 80 0 0 +locus_13:12:0:4 4 12 1098 1098 1 1098 1 1098 1098 0 100.0 100 100 plus 0.0 2028 12 locus_13 SALM_1934 - 3.9 M 1098 8f300259dcb46224bdc1fe5273107324 366 b16bc26e42f5bc4327504b1ec8b2d53d 768.6 1427.4 256.2 475.8 80 64 80 80 0 0 +locus_14:13:0:5 5 13 1281 1281 1 1281 1 1281 1281 11 99.141 100 100 plus 0.0 2305 13 locus_14 SALM_2871 + @ N 1281 b9060019038526aa6fc38d2f7510edc6 427 ed11561b2bedaa12c7a28eb0e9346101 896.7 1665.3 298.9 555.1 80 64 80 80 0 0 +locus_15:14:0:6 6 14 1434 1434 1 1434 1 1434 1434 0 100.0 100 100 plus 0.0 2649 14 locus_15 SALM_583 = DMT family transporter O 1434 bc98c2fe196a68a79036814396513a8d 478 16cb2acec887d5861327e04f2705b8ce 1003.8 1864.2 334.6 621.4 80 64 80 80 0 0 +locus_16:15:0:7 7 15 1464 1464 1 1464 1 1464 1464 15 98.975 100 100 plus 0.0 2621 15 locus_16 SALM_780 < murein transglycosylase A P 1464 16e55766c603fe33c9e75d8e81743ae2 488 b20314e55f9713235e9c4ea5817b56df 1024.8 1903.2 341.6 634.4 80 64 80 80 0 0 +locus_17:16:0:8 8 16 1836 1836 1 1836 1 1836 1836 0 100.0 100 100 plus 0.0 3391 16 locus_17 SALM_1937 > GTPase HflX Q 1836 a0d97d985483413f3c18bfe5833ae9ce 612 d6012eddc7a6e8d7d40761d00ed71a5a 1285.2 2386.8 428.4 795.6 80 64 80 80 0 0 +locus_18:17:0:9 9 17 1914 1914 1 1914 1 1914 1914 0 100.0 100 100 plus 0.0 3535 17 locus_18 SALM_1997 ? biosynthetic arginine decarboxylase R 1914 b3021e979faa7600756c06dfadfcf14c 638 e68e83956ee1d4c685571e5348c8def1 1339.8 2488.2 446.6 829.4 80 64 80 80 0 0 +locus_19:18:0:10 10 18 2037 2037 1 2037 1 2037 2037 16 99.215 100 100 plus 0.0 3674 18 locus_19 SALM_9926 , https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662 S 2037 a012eee23637b48e39b00808a057e35d 679 1302fea1dbd5bb756db34e09b863ad44 1425.9 2648.1 475.3 882.7 80 64 80 80 0 0 +locus_2:1:0:11 11 1 285 285 1 285 1 285 285 17 94.035 100 100 plus 2.3100000000000004e-124 433 1 locus_2 SALM_120 @ outer membrane protein B 285 e35184c8ff18e9116fc8faef20532f56 95 2d9e7f7f0d11bf02db860b0799e7924d 199.5 370.5 66.5 123.5 80 64 80 80 0 0 +locus_20:19:0:12 12 19 4935 4935 1 4935 1 4935 4935 0 100.0 100 100 plus 0.0 9114 19 locus_20 SALM_6064 . alpha-2-macroglobulin family protein T 4935 4461918e985715e4a2b07494e1f91326 1645 12ff39a1933fa478729b142976b0a659 3454.5 6415.5 1151.5 2138.5 80 64 80 80 0 0 +locus_3:2:0:13 13 2 327 327 1 327 1 327 327 0 100.0 100 100 plus 5.34e-176 604 2 locus_3 SALM_2016 # tRNA (guanosine(46)-N7)-methyltransferase TrmB C 327 670705cd2a59c4a23a897ac656a888fe 109 4277f8bbf1fd6682001da089fea83d04 228.9 425.1 76.3 141.7 80 64 80 80 0 0 +locus_4:3:0:14 14 3 417 417 1 417 1 417 417 11 97.362 100 100 plus 0.0 710 3 locus_4 SALM_8644 $ AZ624_004720 AZ624_004720 417 ac1b21798c0f672ad26f5a91ea278590 139 0c25367401155278f34832f184ab44e6 291.9 542.1 97.3 180.7 80 64 80 80 0 0 +locus_5:4:0:15 15 4 444 444 1 444 1 444 444 15 96.622 100 100 plus 0.0 737 4 locus_5 SALM_1876 % SPI-1 type III secretion system invasion lipoprotein InvH E 444 d00defcca8588f21ce16fa1d0ac13389 148 c8bf12a8057fc5e541fcd4924136a40d 310.8 577.2 103.6 192.4 80 64 80 80 0 0 +locus_6:5:0:16 16 5 543 543 1 543 1 543 543 0 100.0 100 100 plus 0.0 1003 5 locus_6 SALM_640 ^ MOSC domain-containing protein F 543 a11561f2804e2c32c78049f8b9aeb517 181 1f6a45ea291940ef2c17ec1cfdd5fbdd 380.1 705.9 126.7 235.3 80 64 80 80 0 0 +locus_7:6:0:17 17 6 606 606 1 606 1 606 606 15 97.525 100 100 plus 0.0 1037 6 locus_7 SALM_1501 & India: Vellore G 606 dc94bf1ec4ff9bed2a1f460cbd958656 202 62252b3326997117f127efb88ff09294 424.2 787.8 141.4 262.6 80 64 80 80 0 0 +locus_8:7:0:18 18 7 642 642 1 642 1 642 642 0 100.0 100 100 plus 0.0 1186 7 locus_8 SALM_756 * DNA polymerase III subunit delta' H 642 7ebe74afecf146ec4db816c8deced64f 214 2449629747a7c58f0f2cad411db87178 449.4 834.6 149.8 278.2 80 64 80 80 0 0 +locus_9:8:0:19 19 8 684 684 1 684 1 684 684 0 100.0 100 100 plus 0.0 1264 8 locus_9 SALM_7353 1 fimbrial assembly chaperone I 684 41ebb36872854b2b33c8c028e23d8ad1 228 a75a93991228940fb46d917f238beb5a 478.8 889.2 159.6 296.4 80 64 80 80 0 0 diff --git a/tests/test_data/outputs/report/normal/G5/profile.json b/tests/test_data/outputs/report/normal/G5/profile.json new file mode 100755 index 0000000..5afae24 --- /dev/null +++ b/tests/test_data/outputs/report/normal/G5/profile.json @@ -0,0 +1,24 @@ +{ + "G5": { + "locus_1": "e9e707ebc64e10a881f1323ebff85369", + "locus_2": "8b70e777f6bbf2c91ff75947824b5976", + "locus_3": "670705cd2a59c4a23a897ac656a888fe", + "locus_4": "73790840c76943caac0ebb3b2b3f0b98", + "locus_5": "8cf4341689dd00f74adfcc43d1f4a35e", + "locus_6": "a11561f2804e2c32c78049f8b9aeb517", + "locus_7": "49d9878c9d3071aa1d2f26cb947b784c", + "locus_8": "7ebe74afecf146ec4db816c8deced64f", + "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", + "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", + "locus_11": "c4266f2f24fdd8e039113c6b0955af9f", + "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", + "locus_13": "8f300259dcb46224bdc1fe5273107324", + "locus_14": "2fa0b06ed72e36b4071cab9d0b4f87d0", + "locus_15": "bc98c2fe196a68a79036814396513a8d", + "locus_16": "a9b3cb97dac3cda6e932a49bf9a507bd", + "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", + "locus_18": "b3021e979faa7600756c06dfadfcf14c", + "locus_19": "de32372598811d63bcc1a0eaf6872644", + "locus_20": "4461918e985715e4a2b07494e1f91326" + } +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/normal/G5/protein.hits.txt b/tests/test_data/outputs/report/normal/G5/protein.hits.txt new file mode 100755 index 0000000..8e13c2b --- /dev/null +++ b/tests/test_data/outputs/report/normal/G5/protein.hits.txt @@ -0,0 +1,21 @@ +query_name qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore seq_id locus_name locus_name_alt locus_product locus_description locus_uid dna_seq_len dna_seq_hash aa_seq_len aa_seq_hash dna_min_len dna_max_len aa_min_len aa_max_len dna_min_ident aa_min_ident min_dna_match_cov min_aa_match_cov count_int_stops dna_ambig_count +locus_1:0:0:0 0 0 34 34 1 34 1 34 34 2 94.118 100 100 2.04e-17 57.8 0 locus_1 SALM_11273 ! hypothetical protein 1 102 d17b02d12afa7f832ee37df6f24a8f55 34 84ac553cb45dd790c497c27152888f02 71.4 132.6 23.8 44.2 80 64 80 80 0 0 +locus_10:9:0:1 1 9 254 254 1 254 1 254 254 0 100.0 100 100 0.0 514.0 9 locus_10 SALM_1891 200.96 5'/3'-nucleotidase SurE J 762 fe04d17ec353c08b903c85fc0ca4dc02 254 bd09702e070040e0fc8d2ec3b830812c 533.4 990.6 177.8 330.2 80 64 80 80 0 0 +locus_11:10:0:2 2 10 286 286 1 286 1 286 286 17 94.056 100 100 0.0 518.0 10 locus_11 SALM_1452 | 1-phosphofructokinase K 858 5b128d659955716833ce42f2bb060212 286 4daecf1a6ccae76e1d97a4ce9ee4ff0b 600.6 1115.4 200.2 371.8 80 64 80 80 0 0 +locus_12:11:0:3 3 11 324 324 1 324 1 324 324 0 100.0 100 100 0.0 640.0 11 locus_12 SALM_11020 _ 1 L 972 eb72da68c159497d5f0c8eeddc51b5ae 324 a1f16dd269dc295e715f2d00f9c26b43 680.4 1263.6 226.8 421.2 80 64 80 80 0 0 +locus_13:12:0:4 4 12 366 366 1 366 1 366 366 0 100.0 100 100 0.0 754.0 12 locus_13 SALM_1934 - 3.9 M 1098 8f300259dcb46224bdc1fe5273107324 366 b16bc26e42f5bc4327504b1ec8b2d53d 768.6 1427.4 256.2 475.8 80 64 80 80 0 0 +locus_14:13:0:5 5 13 427 427 1 427 1 427 427 9 97.892 100 100 0.0 846.0 13 locus_14 SALM_2871 + @ N 1281 b9060019038526aa6fc38d2f7510edc6 427 ed11561b2bedaa12c7a28eb0e9346101 896.7 1665.3 298.9 555.1 80 64 80 80 0 0 +locus_15:14:0:6 6 14 478 478 1 478 1 478 478 0 100.0 100 100 0.0 972.0 14 locus_15 SALM_583 = DMT family transporter O 1434 bc98c2fe196a68a79036814396513a8d 478 16cb2acec887d5861327e04f2705b8ce 1003.8 1864.2 334.6 621.4 80 64 80 80 0 0 +locus_16:15:0:7 7 15 488 488 1 488 1 488 488 14 97.131 100 100 0.0 972.0 15 locus_16 SALM_780 < murein transglycosylase A P 1464 16e55766c603fe33c9e75d8e81743ae2 488 b20314e55f9713235e9c4ea5817b56df 1024.8 1903.2 341.6 634.4 80 64 80 80 0 0 +locus_17:16:0:8 8 16 612 612 1 612 1 612 612 0 100.0 100 100 0.0 1241.0 16 locus_17 SALM_1937 > GTPase HflX Q 1836 a0d97d985483413f3c18bfe5833ae9ce 612 d6012eddc7a6e8d7d40761d00ed71a5a 1285.2 2386.8 428.4 795.6 80 64 80 80 0 0 +locus_18:17:0:9 9 17 638 638 1 638 1 638 638 0 100.0 100 100 0.0 1326.0 17 locus_18 SALM_1997 ? biosynthetic arginine decarboxylase R 1914 b3021e979faa7600756c06dfadfcf14c 638 e68e83956ee1d4c685571e5348c8def1 1339.8 2488.2 446.6 829.4 80 64 80 80 0 0 +locus_19:18:0:10 10 18 679 679 1 679 1 679 679 8 98.822 100 100 0.0 1403.0 18 locus_19 SALM_9926 , https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662 S 2037 a012eee23637b48e39b00808a057e35d 679 1302fea1dbd5bb756db34e09b863ad44 1425.9 2648.1 475.3 882.7 80 64 80 80 0 0 +locus_2:1:0:11 11 1 95 95 1 95 1 95 95 11 88.421 100 100 2.89e-51 147.0 1 locus_2 SALM_120 @ outer membrane protein B 285 e35184c8ff18e9116fc8faef20532f56 95 2d9e7f7f0d11bf02db860b0799e7924d 199.5 370.5 66.5 123.5 80 64 80 80 0 0 +locus_20:19:0:12 12 19 1645 1645 1 1645 1 1645 1645 0 100.0 100 100 0.0 3332.0 19 locus_20 SALM_6064 . alpha-2-macroglobulin family protein T 4935 4461918e985715e4a2b07494e1f91326 1645 12ff39a1933fa478729b142976b0a659 3454.5 6415.5 1151.5 2138.5 80 64 80 80 0 0 +locus_3:2:0:13 13 2 109 109 1 109 1 109 109 0 100.0 100 100 3.09e-81 224.0 2 locus_3 SALM_2016 # tRNA (guanosine(46)-N7)-methyltransferase TrmB C 327 670705cd2a59c4a23a897ac656a888fe 109 4277f8bbf1fd6682001da089fea83d04 228.9 425.1 76.3 141.7 80 64 80 80 0 0 +locus_4:3:0:14 14 3 139 139 1 139 1 139 139 9 93.525 100 100 3.6999999999999996e-98 269.0 3 locus_4 SALM_8644 $ AZ624_004720 AZ624_004720 417 ac1b21798c0f672ad26f5a91ea278590 139 0c25367401155278f34832f184ab44e6 291.9 542.1 97.3 180.7 80 64 80 80 0 0 +locus_5:4:0:15 15 4 148 148 1 148 1 148 148 11 92.568 100 100 1.5600000000000001e-97 268.0 4 locus_5 SALM_1876 % SPI-1 type III secretion system invasion lipoprotein InvH E 444 d00defcca8588f21ce16fa1d0ac13389 148 c8bf12a8057fc5e541fcd4924136a40d 310.8 577.2 103.6 192.4 80 64 80 80 0 0 +locus_6:5:0:16 16 5 181 181 1 181 1 181 181 0 100.0 100 100 1.7899999999999999e-140 380.0 5 locus_6 SALM_640 ^ MOSC domain-containing protein F 543 a11561f2804e2c32c78049f8b9aeb517 181 1f6a45ea291940ef2c17ec1cfdd5fbdd 380.1 705.9 126.7 235.3 80 64 80 80 0 0 +locus_7:6:0:17 17 6 202 202 1 202 1 202 202 14 93.069 100 100 2.36e-141 384.0 6 locus_7 SALM_1501 & India: Vellore G 606 dc94bf1ec4ff9bed2a1f460cbd958656 202 62252b3326997117f127efb88ff09294 424.2 787.8 141.4 262.6 80 64 80 80 0 0 +locus_8:7:0:18 18 7 214 214 1 214 1 214 214 0 100.0 100 100 1.75e-157 426.0 7 locus_8 SALM_756 * DNA polymerase III subunit delta' H 642 7ebe74afecf146ec4db816c8deced64f 214 2449629747a7c58f0f2cad411db87178 449.4 834.6 149.8 278.2 80 64 80 80 0 0 +locus_9:8:0:19 19 8 228 228 1 228 1 228 228 0 100.0 100 100 6.32e-172 463.0 8 locus_9 SALM_7353 1 fimbrial assembly chaperone I 684 41ebb36872854b2b33c8c028e23d8ad1 228 a75a93991228940fb46d917f238beb5a 478.8 889.2 159.6 296.4 80 64 80 80 0 0 diff --git a/tests/test_data/outputs/report/normal/G6/nucleotide.hits.txt b/tests/test_data/outputs/report/normal/G6/nucleotide.hits.txt new file mode 100755 index 0000000..d721903 --- /dev/null +++ b/tests/test_data/outputs/report/normal/G6/nucleotide.hits.txt @@ -0,0 +1,21 @@ +query_name qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore seq_id locus_name locus_name_alt locus_product locus_description locus_uid dna_seq_len dna_seq_hash aa_seq_len aa_seq_hash dna_min_len dna_max_len aa_min_len aa_max_len dna_min_ident aa_min_ident min_dna_match_cov min_aa_match_cov count_int_stops dna_ambig_count +locus_1:0:0:0 0 0 102 102 1 102 1 102 102 2 98.039 100 100 plus 3.8999999999999997e-48 178 0 locus_1 SALM_11273 ! hypothetical protein 1 102 d17b02d12afa7f832ee37df6f24a8f55 34 84ac553cb45dd790c497c27152888f02 71.4 132.6 23.8 44.2 80 64 80 80 0 0 +locus_10:9:0:1 1 9 762 762 1 762 1 762 762 0 100.0 100 100 plus 0.0 1408 9 locus_10 SALM_1891 200.96 5'/3'-nucleotidase SurE J 762 fe04d17ec353c08b903c85fc0ca4dc02 254 bd09702e070040e0fc8d2ec3b830812c 533.4 990.6 177.8 330.2 80 64 80 80 0 0 +locus_11:10:0:2 2 10 858 858 1 858 1 858 858 0 100.0 100 100 plus 0.0 1585 10 locus_11 SALM_1452 | 1-phosphofructokinase K 858 5b128d659955716833ce42f2bb060212 286 4daecf1a6ccae76e1d97a4ce9ee4ff0b 600.6 1115.4 200.2 371.8 80 64 80 80 0 0 +locus_12:11:0:3 3 11 972 972 1 972 1 972 972 0 100.0 100 100 plus 0.0 1796 11 locus_12 SALM_11020 _ 1 L 972 eb72da68c159497d5f0c8eeddc51b5ae 324 a1f16dd269dc295e715f2d00f9c26b43 680.4 1263.6 226.8 421.2 80 64 80 80 0 0 +locus_13:12:0:4 4 12 1098 1098 1 1098 1 1098 1098 0 100.0 100 100 plus 0.0 2028 12 locus_13 SALM_1934 - 3.9 M 1098 8f300259dcb46224bdc1fe5273107324 366 b16bc26e42f5bc4327504b1ec8b2d53d 768.6 1427.4 256.2 475.8 80 64 80 80 0 0 +locus_14:13:0:5 5 13 1281 1281 1 1281 1 1281 1281 0 100.0 100 100 plus 0.0 2366 13 locus_14 SALM_2871 + @ N 1281 b9060019038526aa6fc38d2f7510edc6 427 ed11561b2bedaa12c7a28eb0e9346101 896.7 1665.3 298.9 555.1 80 64 80 80 0 0 +locus_15:14:0:6 6 14 1434 1434 1 1434 1 1434 1434 0 100.0 100 100 plus 0.0 2649 14 locus_15 SALM_583 = DMT family transporter O 1434 bc98c2fe196a68a79036814396513a8d 478 16cb2acec887d5861327e04f2705b8ce 1003.8 1864.2 334.6 621.4 80 64 80 80 0 0 +locus_16:15:0:7 7 15 1464 1464 1 1464 1 1464 1464 0 100.0 100 100 plus 0.0 2704 15 locus_16 SALM_780 < murein transglycosylase A P 1464 16e55766c603fe33c9e75d8e81743ae2 488 b20314e55f9713235e9c4ea5817b56df 1024.8 1903.2 341.6 634.4 80 64 80 80 0 0 +locus_17:16:0:8 8 16 1836 1836 1 1836 1 1836 1836 0 100.0 100 100 plus 0.0 3391 16 locus_17 SALM_1937 > GTPase HflX Q 1836 a0d97d985483413f3c18bfe5833ae9ce 612 d6012eddc7a6e8d7d40761d00ed71a5a 1285.2 2386.8 428.4 795.6 80 64 80 80 0 0 +locus_18:17:0:9 9 17 1914 1914 1 1914 1 1914 1914 0 100.0 100 100 plus 0.0 3535 17 locus_18 SALM_1997 ? biosynthetic arginine decarboxylase R 1914 b3021e979faa7600756c06dfadfcf14c 638 e68e83956ee1d4c685571e5348c8def1 1339.8 2488.2 446.6 829.4 80 64 80 80 0 0 +locus_19:18:0:10 10 18 2037 2037 1 2037 1 2037 2037 0 100.0 100 100 plus 0.0 3762 18 locus_19 SALM_9926 , https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662 S 2037 a012eee23637b48e39b00808a057e35d 679 1302fea1dbd5bb756db34e09b863ad44 1425.9 2648.1 475.3 882.7 80 64 80 80 0 0 +locus_2:1:0:11 11 1 285 285 1 285 1 285 285 0 100.0 100 100 plus 1.03e-152 527 1 locus_2 SALM_120 @ outer membrane protein B 285 e35184c8ff18e9116fc8faef20532f56 95 2d9e7f7f0d11bf02db860b0799e7924d 199.5 370.5 66.5 123.5 80 64 80 80 0 0 +locus_20:19:0:12 12 19 4935 4935 1 4935 1 4935 4935 0 100.0 100 100 plus 0.0 9114 19 locus_20 SALM_6064 . alpha-2-macroglobulin family protein T 4935 4461918e985715e4a2b07494e1f91326 1645 12ff39a1933fa478729b142976b0a659 3454.5 6415.5 1151.5 2138.5 80 64 80 80 0 0 +locus_3:2:0:13 13 2 327 327 1 327 1 327 327 0 100.0 100 100 plus 5.34e-176 604 2 locus_3 SALM_2016 # tRNA (guanosine(46)-N7)-methyltransferase TrmB C 327 670705cd2a59c4a23a897ac656a888fe 109 4277f8bbf1fd6682001da089fea83d04 228.9 425.1 76.3 141.7 80 64 80 80 0 0 +locus_4:3:0:14 14 3 417 417 1 417 1 417 417 0 100.0 100 100 plus 0.0 771 3 locus_4 SALM_8644 $ AZ624_004720 AZ624_004720 417 ac1b21798c0f672ad26f5a91ea278590 139 0c25367401155278f34832f184ab44e6 291.9 542.1 97.3 180.7 80 64 80 80 0 0 +locus_5:4:0:15 15 4 444 444 1 444 1 444 444 0 100.0 100 100 plus 0.0 821 4 locus_5 SALM_1876 % SPI-1 type III secretion system invasion lipoprotein InvH E 444 d00defcca8588f21ce16fa1d0ac13389 148 c8bf12a8057fc5e541fcd4924136a40d 310.8 577.2 103.6 192.4 80 64 80 80 0 0 +locus_6:5:0:16 16 5 543 543 1 543 1 543 543 0 100.0 100 100 plus 0.0 1003 5 locus_6 SALM_640 ^ MOSC domain-containing protein F 543 a11561f2804e2c32c78049f8b9aeb517 181 1f6a45ea291940ef2c17ec1cfdd5fbdd 380.1 705.9 126.7 235.3 80 64 80 80 0 0 +locus_7:6:0:17 17 6 606 606 1 606 1 606 606 0 100.0 100 100 plus 0.0 1120 6 locus_7 SALM_1501 & India: Vellore G 606 dc94bf1ec4ff9bed2a1f460cbd958656 202 62252b3326997117f127efb88ff09294 424.2 787.8 141.4 262.6 80 64 80 80 0 0 +locus_8:7:0:18 18 7 642 642 1 642 1 642 642 0 100.0 100 100 plus 0.0 1186 7 locus_8 SALM_756 * DNA polymerase III subunit delta' H 642 7ebe74afecf146ec4db816c8deced64f 214 2449629747a7c58f0f2cad411db87178 449.4 834.6 149.8 278.2 80 64 80 80 0 0 +locus_9:8:0:19 19 8 684 684 1 684 1 684 684 0 100.0 100 100 plus 0.0 1264 8 locus_9 SALM_7353 1 fimbrial assembly chaperone I 684 41ebb36872854b2b33c8c028e23d8ad1 228 a75a93991228940fb46d917f238beb5a 478.8 889.2 159.6 296.4 80 64 80 80 0 0 diff --git a/tests/test_data/outputs/report/normal/G6/profile.json b/tests/test_data/outputs/report/normal/G6/profile.json new file mode 100755 index 0000000..2b54aeb --- /dev/null +++ b/tests/test_data/outputs/report/normal/G6/profile.json @@ -0,0 +1,24 @@ +{ + "G6": { + "locus_1": "a47cc24760462371e919143c5cc81376", + "locus_2": "e35184c8ff18e9116fc8faef20532f56", + "locus_3": "670705cd2a59c4a23a897ac656a888fe", + "locus_4": "ac1b21798c0f672ad26f5a91ea278590", + "locus_5": "d00defcca8588f21ce16fa1d0ac13389", + "locus_6": "a11561f2804e2c32c78049f8b9aeb517", + "locus_7": "dc94bf1ec4ff9bed2a1f460cbd958656", + "locus_8": "7ebe74afecf146ec4db816c8deced64f", + "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", + "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", + "locus_11": "5b128d659955716833ce42f2bb060212", + "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", + "locus_13": "8f300259dcb46224bdc1fe5273107324", + "locus_14": "b9060019038526aa6fc38d2f7510edc6", + "locus_15": "bc98c2fe196a68a79036814396513a8d", + "locus_16": "16e55766c603fe33c9e75d8e81743ae2", + "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", + "locus_18": "b3021e979faa7600756c06dfadfcf14c", + "locus_19": "a012eee23637b48e39b00808a057e35d", + "locus_20": "4461918e985715e4a2b07494e1f91326" + } +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/normal/G6/protein.hits.txt b/tests/test_data/outputs/report/normal/G6/protein.hits.txt new file mode 100755 index 0000000..f5cf292 --- /dev/null +++ b/tests/test_data/outputs/report/normal/G6/protein.hits.txt @@ -0,0 +1,21 @@ +query_name qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore seq_id locus_name locus_name_alt locus_product locus_description locus_uid dna_seq_len dna_seq_hash aa_seq_len aa_seq_hash dna_min_len dna_max_len aa_min_len aa_max_len dna_min_ident aa_min_ident min_dna_match_cov min_aa_match_cov count_int_stops dna_ambig_count +locus_1:0:0:0 0 0 34 34 1 34 1 34 34 1 97.059 100 100 1.28e-18 60.8 0 locus_1 SALM_11273 ! hypothetical protein 1 102 d17b02d12afa7f832ee37df6f24a8f55 34 84ac553cb45dd790c497c27152888f02 71.4 132.6 23.8 44.2 80 64 80 80 0 0 +locus_10:9:0:1 1 9 254 254 1 254 1 254 254 0 100.0 100 100 0.0 514.0 9 locus_10 SALM_1891 200.96 5'/3'-nucleotidase SurE J 762 fe04d17ec353c08b903c85fc0ca4dc02 254 bd09702e070040e0fc8d2ec3b830812c 533.4 990.6 177.8 330.2 80 64 80 80 0 0 +locus_11:10:0:2 2 10 286 286 1 286 1 286 286 0 100.0 100 100 0.0 579.0 10 locus_11 SALM_1452 | 1-phosphofructokinase K 858 5b128d659955716833ce42f2bb060212 286 4daecf1a6ccae76e1d97a4ce9ee4ff0b 600.6 1115.4 200.2 371.8 80 64 80 80 0 0 +locus_12:11:0:3 3 11 324 324 1 324 1 324 324 0 100.0 100 100 0.0 640.0 11 locus_12 SALM_11020 _ 1 L 972 eb72da68c159497d5f0c8eeddc51b5ae 324 a1f16dd269dc295e715f2d00f9c26b43 680.4 1263.6 226.8 421.2 80 64 80 80 0 0 +locus_13:12:0:4 4 12 366 366 1 366 1 366 366 0 100.0 100 100 0.0 754.0 12 locus_13 SALM_1934 - 3.9 M 1098 8f300259dcb46224bdc1fe5273107324 366 b16bc26e42f5bc4327504b1ec8b2d53d 768.6 1427.4 256.2 475.8 80 64 80 80 0 0 +locus_14:13:0:5 5 13 427 427 1 427 1 427 427 0 100.0 100 100 0.0 869.0 13 locus_14 SALM_2871 + @ N 1281 b9060019038526aa6fc38d2f7510edc6 427 ed11561b2bedaa12c7a28eb0e9346101 896.7 1665.3 298.9 555.1 80 64 80 80 0 0 +locus_15:14:0:6 6 14 478 478 1 478 1 478 478 0 100.0 100 100 0.0 972.0 14 locus_15 SALM_583 = DMT family transporter O 1434 bc98c2fe196a68a79036814396513a8d 478 16cb2acec887d5861327e04f2705b8ce 1003.8 1864.2 334.6 621.4 80 64 80 80 0 0 +locus_16:15:0:7 7 15 488 488 1 488 1 488 488 0 100.0 100 100 0.0 1004.0 15 locus_16 SALM_780 < murein transglycosylase A P 1464 16e55766c603fe33c9e75d8e81743ae2 488 b20314e55f9713235e9c4ea5817b56df 1024.8 1903.2 341.6 634.4 80 64 80 80 0 0 +locus_17:16:0:8 8 16 612 612 1 612 1 612 612 0 100.0 100 100 0.0 1241.0 16 locus_17 SALM_1937 > GTPase HflX Q 1836 a0d97d985483413f3c18bfe5833ae9ce 612 d6012eddc7a6e8d7d40761d00ed71a5a 1285.2 2386.8 428.4 795.6 80 64 80 80 0 0 +locus_18:17:0:9 9 17 638 638 1 638 1 638 638 0 100.0 100 100 0.0 1326.0 17 locus_18 SALM_1997 ? biosynthetic arginine decarboxylase R 1914 b3021e979faa7600756c06dfadfcf14c 638 e68e83956ee1d4c685571e5348c8def1 1339.8 2488.2 446.6 829.4 80 64 80 80 0 0 +locus_19:18:0:10 10 18 679 679 1 679 1 679 679 0 100.0 100 100 0.0 1419.0 18 locus_19 SALM_9926 , https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662 S 2037 a012eee23637b48e39b00808a057e35d 679 1302fea1dbd5bb756db34e09b863ad44 1425.9 2648.1 475.3 882.7 80 64 80 80 0 0 +locus_2:1:0:11 11 1 95 95 1 95 1 95 95 0 100.0 100 100 1.2299999999999998e-68 191.0 1 locus_2 SALM_120 @ outer membrane protein B 285 e35184c8ff18e9116fc8faef20532f56 95 2d9e7f7f0d11bf02db860b0799e7924d 199.5 370.5 66.5 123.5 80 64 80 80 0 0 +locus_20:19:0:12 12 19 1645 1645 1 1645 1 1645 1645 0 100.0 100 100 0.0 3332.0 19 locus_20 SALM_6064 . alpha-2-macroglobulin family protein T 4935 4461918e985715e4a2b07494e1f91326 1645 12ff39a1933fa478729b142976b0a659 3454.5 6415.5 1151.5 2138.5 80 64 80 80 0 0 +locus_3:2:0:13 13 2 109 109 1 109 1 109 109 0 100.0 100 100 3.09e-81 224.0 2 locus_3 SALM_2016 # tRNA (guanosine(46)-N7)-methyltransferase TrmB C 327 670705cd2a59c4a23a897ac656a888fe 109 4277f8bbf1fd6682001da089fea83d04 228.9 425.1 76.3 141.7 80 64 80 80 0 0 +locus_4:3:0:14 14 3 139 139 1 139 1 139 139 0 100.0 100 100 2.34e-106 290.0 3 locus_4 SALM_8644 $ AZ624_004720 AZ624_004720 417 ac1b21798c0f672ad26f5a91ea278590 139 0c25367401155278f34832f184ab44e6 291.9 542.1 97.3 180.7 80 64 80 80 0 0 +locus_5:4:0:15 15 4 148 148 1 148 1 148 148 0 100.0 100 100 1.0400000000000001e-110 302.0 4 locus_5 SALM_1876 % SPI-1 type III secretion system invasion lipoprotein InvH E 444 d00defcca8588f21ce16fa1d0ac13389 148 c8bf12a8057fc5e541fcd4924136a40d 310.8 577.2 103.6 192.4 80 64 80 80 0 0 +locus_6:5:0:16 16 5 181 181 1 181 1 181 181 0 100.0 100 100 1.7899999999999999e-140 380.0 5 locus_6 SALM_640 ^ MOSC domain-containing protein F 543 a11561f2804e2c32c78049f8b9aeb517 181 1f6a45ea291940ef2c17ec1cfdd5fbdd 380.1 705.9 126.7 235.3 80 64 80 80 0 0 +locus_7:6:0:17 17 6 202 202 1 202 1 202 202 0 100.0 100 100 3.71e-154 416.0 6 locus_7 SALM_1501 & India: Vellore G 606 dc94bf1ec4ff9bed2a1f460cbd958656 202 62252b3326997117f127efb88ff09294 424.2 787.8 141.4 262.6 80 64 80 80 0 0 +locus_8:7:0:18 18 7 214 214 1 214 1 214 214 0 100.0 100 100 1.75e-157 426.0 7 locus_8 SALM_756 * DNA polymerase III subunit delta' H 642 7ebe74afecf146ec4db816c8deced64f 214 2449629747a7c58f0f2cad411db87178 449.4 834.6 149.8 278.2 80 64 80 80 0 0 +locus_9:8:0:19 19 8 228 228 1 228 1 228 228 0 100.0 100 100 6.32e-172 463.0 8 locus_9 SALM_7353 1 fimbrial assembly chaperone I 684 41ebb36872854b2b33c8c028e23d8ad1 228 a75a93991228940fb46d917f238beb5a 478.8 889.2 159.6 296.4 80 64 80 80 0 0 diff --git a/tests/test_data/outputs/report/normal/G7/nucleotide.hits.txt b/tests/test_data/outputs/report/normal/G7/nucleotide.hits.txt new file mode 100755 index 0000000..d90535a --- /dev/null +++ b/tests/test_data/outputs/report/normal/G7/nucleotide.hits.txt @@ -0,0 +1,21 @@ +query_name qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore seq_id locus_name locus_name_alt locus_product locus_description locus_uid dna_seq_len dna_seq_hash aa_seq_len aa_seq_hash dna_min_len dna_max_len aa_min_len aa_max_len dna_min_ident aa_min_ident min_dna_match_cov min_aa_match_cov count_int_stops dna_ambig_count +locus_1:0:0:0 0 0 102 102 1 102 1 102 102 0 100.0 100 100 plus 1.8e-51 189 0 locus_1 SALM_11273 ! hypothetical protein 1 102 d17b02d12afa7f832ee37df6f24a8f55 34 84ac553cb45dd790c497c27152888f02 71.4 132.6 23.8 44.2 80 64 80 80 0 0 +locus_10:9:0:1 1 9 762 762 3 762 3 762 760 0 100.0 99 99 plus 0.0 1404 9 locus_10 SALM_1891 200.96 5'/3'-nucleotidase SurE J 762 fe04d17ec353c08b903c85fc0ca4dc02 254 bd09702e070040e0fc8d2ec3b830812c 533.4 990.6 177.8 330.2 80 64 80 80 0 0 +locus_11:10:0:2 2 10 858 858 1 858 1 858 858 19 97.786 100 100 plus 0.0 1480 10 locus_11 SALM_1452 | 1-phosphofructokinase K 858 5b128d659955716833ce42f2bb060212 286 4daecf1a6ccae76e1d97a4ce9ee4ff0b 600.6 1115.4 200.2 371.8 80 64 80 80 0 0 +locus_12:11:0:3 3 11 972 972 1 972 1 972 972 0 100.0 100 100 plus 0.0 1796 11 locus_12 SALM_11020 _ 1 L 972 eb72da68c159497d5f0c8eeddc51b5ae 324 a1f16dd269dc295e715f2d00f9c26b43 680.4 1263.6 226.8 421.2 80 64 80 80 0 0 +locus_13:12:0:4 4 12 1098 1098 1 1098 1 1098 1098 0 100.0 100 100 plus 0.0 2028 12 locus_13 SALM_1934 - 3.9 M 1098 8f300259dcb46224bdc1fe5273107324 366 b16bc26e42f5bc4327504b1ec8b2d53d 768.6 1427.4 256.2 475.8 80 64 80 80 0 0 +locus_14:13:0:5 5 13 1281 1281 1 1281 1 1281 1281 11 99.141 100 100 plus 0.0 2305 13 locus_14 SALM_2871 + @ N 1281 b9060019038526aa6fc38d2f7510edc6 427 ed11561b2bedaa12c7a28eb0e9346101 896.7 1665.3 298.9 555.1 80 64 80 80 0 0 +locus_15:14:0:6 6 14 1434 1434 1 1434 1 1434 1434 0 100.0 100 100 plus 0.0 2649 14 locus_15 SALM_583 = DMT family transporter O 1434 bc98c2fe196a68a79036814396513a8d 478 16cb2acec887d5861327e04f2705b8ce 1003.8 1864.2 334.6 621.4 80 64 80 80 0 0 +locus_16:15:0:7 7 15 1464 1464 1 1464 1 1464 1464 15 98.975 100 100 plus 0.0 2621 15 locus_16 SALM_780 < murein transglycosylase A P 1464 16e55766c603fe33c9e75d8e81743ae2 488 b20314e55f9713235e9c4ea5817b56df 1024.8 1903.2 341.6 634.4 80 64 80 80 0 0 +locus_17:16:0:8 8 16 1836 1836 1 1836 1 1836 1836 0 100.0 100 100 plus 0.0 3391 16 locus_17 SALM_1937 > GTPase HflX Q 1836 a0d97d985483413f3c18bfe5833ae9ce 612 d6012eddc7a6e8d7d40761d00ed71a5a 1285.2 2386.8 428.4 795.6 80 64 80 80 0 0 +locus_18:17:0:9 9 17 1914 1914 1 1914 1 1914 1914 0 100.0 100 100 plus 0.0 3535 17 locus_18 SALM_1997 ? biosynthetic arginine decarboxylase R 1914 b3021e979faa7600756c06dfadfcf14c 638 e68e83956ee1d4c685571e5348c8def1 1339.8 2488.2 446.6 829.4 80 64 80 80 0 0 +locus_19:18:0:10 10 18 2037 2037 1 2037 1 2037 2037 16 99.215 100 100 plus 0.0 3674 18 locus_19 SALM_9926 , https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662 S 2037 a012eee23637b48e39b00808a057e35d 679 1302fea1dbd5bb756db34e09b863ad44 1425.9 2648.1 475.3 882.7 80 64 80 80 0 0 +locus_2:1:0:11 11 1 285 285 1 285 1 285 285 17 94.035 100 100 plus 2.3100000000000004e-124 433 1 locus_2 SALM_120 @ outer membrane protein B 285 e35184c8ff18e9116fc8faef20532f56 95 2d9e7f7f0d11bf02db860b0799e7924d 199.5 370.5 66.5 123.5 80 64 80 80 0 0 +locus_20:19:0:12 12 19 4935 4935 1 4935 1 4935 4935 0 100.0 100 100 plus 0.0 9114 19 locus_20 SALM_6064 . alpha-2-macroglobulin family protein T 4935 4461918e985715e4a2b07494e1f91326 1645 12ff39a1933fa478729b142976b0a659 3454.5 6415.5 1151.5 2138.5 80 64 80 80 0 0 +locus_3:2:0:13 13 2 327 327 1 327 1 327 327 0 100.0 100 100 plus 5.34e-176 604 2 locus_3 SALM_2016 # tRNA (guanosine(46)-N7)-methyltransferase TrmB C 327 670705cd2a59c4a23a897ac656a888fe 109 4277f8bbf1fd6682001da089fea83d04 228.9 425.1 76.3 141.7 80 64 80 80 0 0 +locus_4:3:0:14 14 3 417 417 1 417 1 417 417 11 97.362 100 100 plus 0.0 710 3 locus_4 SALM_8644 $ AZ624_004720 AZ624_004720 417 ac1b21798c0f672ad26f5a91ea278590 139 0c25367401155278f34832f184ab44e6 291.9 542.1 97.3 180.7 80 64 80 80 0 0 +locus_5:4:0:15 15 4 444 444 1 444 1 444 444 15 96.622 100 100 plus 0.0 737 4 locus_5 SALM_1876 % SPI-1 type III secretion system invasion lipoprotein InvH E 444 d00defcca8588f21ce16fa1d0ac13389 148 c8bf12a8057fc5e541fcd4924136a40d 310.8 577.2 103.6 192.4 80 64 80 80 0 0 +locus_6:5:0:16 16 5 543 543 1 543 1 543 543 0 100.0 100 100 plus 0.0 1003 5 locus_6 SALM_640 ^ MOSC domain-containing protein F 543 a11561f2804e2c32c78049f8b9aeb517 181 1f6a45ea291940ef2c17ec1cfdd5fbdd 380.1 705.9 126.7 235.3 80 64 80 80 0 0 +locus_7:6:0:17 17 6 606 606 1 606 1 606 606 15 97.525 100 100 plus 0.0 1037 6 locus_7 SALM_1501 & India: Vellore G 606 dc94bf1ec4ff9bed2a1f460cbd958656 202 62252b3326997117f127efb88ff09294 424.2 787.8 141.4 262.6 80 64 80 80 0 0 +locus_8:7:0:18 18 7 642 642 1 642 1 642 642 0 100.0 100 100 plus 0.0 1186 7 locus_8 SALM_756 * DNA polymerase III subunit delta' H 642 7ebe74afecf146ec4db816c8deced64f 214 2449629747a7c58f0f2cad411db87178 449.4 834.6 149.8 278.2 80 64 80 80 0 0 +locus_9:8:0:19 19 8 684 684 1 684 1 684 684 0 100.0 100 100 plus 0.0 1264 8 locus_9 SALM_7353 1 fimbrial assembly chaperone I 684 41ebb36872854b2b33c8c028e23d8ad1 228 a75a93991228940fb46d917f238beb5a 478.8 889.2 159.6 296.4 80 64 80 80 0 0 diff --git a/tests/test_data/outputs/report/normal/G7/profile.json b/tests/test_data/outputs/report/normal/G7/profile.json new file mode 100755 index 0000000..601f66d --- /dev/null +++ b/tests/test_data/outputs/report/normal/G7/profile.json @@ -0,0 +1,24 @@ +{ + "G7": { + "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", + "locus_2": "8b70e777f6bbf2c91ff75947824b5976", + "locus_3": "670705cd2a59c4a23a897ac656a888fe", + "locus_4": "73790840c76943caac0ebb3b2b3f0b98", + "locus_5": "8cf4341689dd00f74adfcc43d1f4a35e", + "locus_6": "a11561f2804e2c32c78049f8b9aeb517", + "locus_7": "49d9878c9d3071aa1d2f26cb947b784c", + "locus_8": "7ebe74afecf146ec4db816c8deced64f", + "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", + "locus_10": "796419469778f7ec3851c813f59cfff7", + "locus_11": "c4266f2f24fdd8e039113c6b0955af9f", + "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", + "locus_13": "8f300259dcb46224bdc1fe5273107324", + "locus_14": "2fa0b06ed72e36b4071cab9d0b4f87d0", + "locus_15": "bc98c2fe196a68a79036814396513a8d", + "locus_16": "a9b3cb97dac3cda6e932a49bf9a507bd", + "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", + "locus_18": "b3021e979faa7600756c06dfadfcf14c", + "locus_19": "de32372598811d63bcc1a0eaf6872644", + "locus_20": "4461918e985715e4a2b07494e1f91326" + } +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/normal/G7/protein.hits.txt b/tests/test_data/outputs/report/normal/G7/protein.hits.txt new file mode 100755 index 0000000..f7e8456 --- /dev/null +++ b/tests/test_data/outputs/report/normal/G7/protein.hits.txt @@ -0,0 +1,21 @@ +query_name qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore seq_id locus_name locus_name_alt locus_product locus_description locus_uid dna_seq_len dna_seq_hash aa_seq_len aa_seq_hash dna_min_len dna_max_len aa_min_len aa_max_len dna_min_ident aa_min_ident min_dna_match_cov min_aa_match_cov count_int_stops dna_ambig_count +locus_1:0:0:0 0 0 34 34 1 34 1 34 34 0 100.0 100 100 4.94e-20 64.3 0 locus_1 SALM_11273 ! hypothetical protein 1 102 d17b02d12afa7f832ee37df6f24a8f55 34 84ac553cb45dd790c497c27152888f02 71.4 132.6 23.8 44.2 80 64 80 80 0 0 +locus_10:9:0:1 1 9 254 254 2 254 2 254 253 0 100.0 99 99 0.0 512.0 9 locus_10 SALM_1891 200.96 5'/3'-nucleotidase SurE J 762 fe04d17ec353c08b903c85fc0ca4dc02 254 bd09702e070040e0fc8d2ec3b830812c 533.4 990.6 177.8 330.2 80 64 80 80 0 0 +locus_11:10:0:2 2 10 286 286 1 286 1 286 286 17 94.056 100 100 0.0 518.0 10 locus_11 SALM_1452 | 1-phosphofructokinase K 858 5b128d659955716833ce42f2bb060212 286 4daecf1a6ccae76e1d97a4ce9ee4ff0b 600.6 1115.4 200.2 371.8 80 64 80 80 0 0 +locus_12:11:0:3 3 11 324 324 1 324 1 324 324 0 100.0 100 100 0.0 640.0 11 locus_12 SALM_11020 _ 1 L 972 eb72da68c159497d5f0c8eeddc51b5ae 324 a1f16dd269dc295e715f2d00f9c26b43 680.4 1263.6 226.8 421.2 80 64 80 80 0 0 +locus_13:12:0:4 4 12 366 366 1 366 1 366 366 0 100.0 100 100 0.0 754.0 12 locus_13 SALM_1934 - 3.9 M 1098 8f300259dcb46224bdc1fe5273107324 366 b16bc26e42f5bc4327504b1ec8b2d53d 768.6 1427.4 256.2 475.8 80 64 80 80 0 0 +locus_14:13:0:5 5 13 427 427 1 427 1 427 427 9 97.892 100 100 0.0 846.0 13 locus_14 SALM_2871 + @ N 1281 b9060019038526aa6fc38d2f7510edc6 427 ed11561b2bedaa12c7a28eb0e9346101 896.7 1665.3 298.9 555.1 80 64 80 80 0 0 +locus_15:14:0:6 6 14 478 478 1 478 1 478 478 0 100.0 100 100 0.0 972.0 14 locus_15 SALM_583 = DMT family transporter O 1434 bc98c2fe196a68a79036814396513a8d 478 16cb2acec887d5861327e04f2705b8ce 1003.8 1864.2 334.6 621.4 80 64 80 80 0 0 +locus_16:15:0:7 7 15 488 488 1 488 1 488 488 14 97.131 100 100 0.0 972.0 15 locus_16 SALM_780 < murein transglycosylase A P 1464 16e55766c603fe33c9e75d8e81743ae2 488 b20314e55f9713235e9c4ea5817b56df 1024.8 1903.2 341.6 634.4 80 64 80 80 0 0 +locus_17:16:0:8 8 16 612 612 1 612 1 612 612 0 100.0 100 100 0.0 1241.0 16 locus_17 SALM_1937 > GTPase HflX Q 1836 a0d97d985483413f3c18bfe5833ae9ce 612 d6012eddc7a6e8d7d40761d00ed71a5a 1285.2 2386.8 428.4 795.6 80 64 80 80 0 0 +locus_18:17:0:9 9 17 638 638 1 638 1 638 638 0 100.0 100 100 0.0 1326.0 17 locus_18 SALM_1997 ? biosynthetic arginine decarboxylase R 1914 b3021e979faa7600756c06dfadfcf14c 638 e68e83956ee1d4c685571e5348c8def1 1339.8 2488.2 446.6 829.4 80 64 80 80 0 0 +locus_19:18:0:10 10 18 679 679 1 679 1 679 679 8 98.822 100 100 0.0 1403.0 18 locus_19 SALM_9926 , https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662 S 2037 a012eee23637b48e39b00808a057e35d 679 1302fea1dbd5bb756db34e09b863ad44 1425.9 2648.1 475.3 882.7 80 64 80 80 0 0 +locus_2:1:0:11 11 1 95 95 1 95 1 95 95 11 88.421 100 100 2.89e-51 147.0 1 locus_2 SALM_120 @ outer membrane protein B 285 e35184c8ff18e9116fc8faef20532f56 95 2d9e7f7f0d11bf02db860b0799e7924d 199.5 370.5 66.5 123.5 80 64 80 80 0 0 +locus_20:19:0:12 12 19 1645 1645 1 1645 1 1645 1645 0 100.0 100 100 0.0 3332.0 19 locus_20 SALM_6064 . alpha-2-macroglobulin family protein T 4935 4461918e985715e4a2b07494e1f91326 1645 12ff39a1933fa478729b142976b0a659 3454.5 6415.5 1151.5 2138.5 80 64 80 80 0 0 +locus_3:2:0:13 13 2 109 109 1 109 1 109 109 0 100.0 100 100 3.09e-81 224.0 2 locus_3 SALM_2016 # tRNA (guanosine(46)-N7)-methyltransferase TrmB C 327 670705cd2a59c4a23a897ac656a888fe 109 4277f8bbf1fd6682001da089fea83d04 228.9 425.1 76.3 141.7 80 64 80 80 0 0 +locus_4:3:0:14 14 3 139 139 1 139 1 139 139 9 93.525 100 100 3.6999999999999996e-98 269.0 3 locus_4 SALM_8644 $ AZ624_004720 AZ624_004720 417 ac1b21798c0f672ad26f5a91ea278590 139 0c25367401155278f34832f184ab44e6 291.9 542.1 97.3 180.7 80 64 80 80 0 0 +locus_5:4:0:15 15 4 148 148 1 148 1 148 148 11 92.568 100 100 1.5600000000000001e-97 268.0 4 locus_5 SALM_1876 % SPI-1 type III secretion system invasion lipoprotein InvH E 444 d00defcca8588f21ce16fa1d0ac13389 148 c8bf12a8057fc5e541fcd4924136a40d 310.8 577.2 103.6 192.4 80 64 80 80 0 0 +locus_6:5:0:16 16 5 181 181 1 181 1 181 181 0 100.0 100 100 1.7899999999999999e-140 380.0 5 locus_6 SALM_640 ^ MOSC domain-containing protein F 543 a11561f2804e2c32c78049f8b9aeb517 181 1f6a45ea291940ef2c17ec1cfdd5fbdd 380.1 705.9 126.7 235.3 80 64 80 80 0 0 +locus_7:6:0:17 17 6 202 202 1 202 1 202 202 14 93.069 100 100 2.36e-141 384.0 6 locus_7 SALM_1501 & India: Vellore G 606 dc94bf1ec4ff9bed2a1f460cbd958656 202 62252b3326997117f127efb88ff09294 424.2 787.8 141.4 262.6 80 64 80 80 0 0 +locus_8:7:0:18 18 7 214 214 1 214 1 214 214 0 100.0 100 100 1.75e-157 426.0 7 locus_8 SALM_756 * DNA polymerase III subunit delta' H 642 7ebe74afecf146ec4db816c8deced64f 214 2449629747a7c58f0f2cad411db87178 449.4 834.6 149.8 278.2 80 64 80 80 0 0 +locus_9:8:0:19 19 8 228 228 1 228 1 228 228 0 100.0 100 100 6.32e-172 463.0 8 locus_9 SALM_7353 1 fimbrial assembly chaperone I 684 41ebb36872854b2b33c8c028e23d8ad1 228 a75a93991228940fb46d917f238beb5a 478.8 889.2 159.6 296.4 80 64 80 80 0 0 diff --git a/tests/test_data/outputs/report/normal/G8/nucleotide.hits.txt b/tests/test_data/outputs/report/normal/G8/nucleotide.hits.txt new file mode 100755 index 0000000..aac444b --- /dev/null +++ b/tests/test_data/outputs/report/normal/G8/nucleotide.hits.txt @@ -0,0 +1,21 @@ +query_name qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore seq_id locus_name locus_name_alt locus_product locus_description locus_uid dna_seq_len dna_seq_hash aa_seq_len aa_seq_hash dna_min_len dna_max_len aa_min_len aa_max_len dna_min_ident aa_min_ident min_dna_match_cov min_aa_match_cov count_int_stops dna_ambig_count +locus_1:0:0:0 0 0 102 102 1 102 1 102 102 0 100.0 100 100 plus 1.8e-51 189 0 locus_1 SALM_11273 ! hypothetical protein 1 102 d17b02d12afa7f832ee37df6f24a8f55 34 84ac553cb45dd790c497c27152888f02 71.4 132.6 23.8 44.2 80 64 80 80 0 0 +locus_10:9:0:1 1 9 762 762 4 762 4 762 759 0 100.0 99 99 plus 0.0 1402 9 locus_10 SALM_1891 200.96 5'/3'-nucleotidase SurE J 762 fe04d17ec353c08b903c85fc0ca4dc02 254 bd09702e070040e0fc8d2ec3b830812c 533.4 990.6 177.8 330.2 80 64 80 80 0 0 +locus_11:10:0:2 2 10 858 858 1 858 1 858 858 0 100.0 100 100 plus 0.0 1585 10 locus_11 SALM_1452 | 1-phosphofructokinase K 858 5b128d659955716833ce42f2bb060212 286 4daecf1a6ccae76e1d97a4ce9ee4ff0b 600.6 1115.4 200.2 371.8 80 64 80 80 0 0 +locus_12:11:0:3 3 11 972 972 1 972 1 972 972 0 100.0 100 100 plus 0.0 1796 11 locus_12 SALM_11020 _ 1 L 972 eb72da68c159497d5f0c8eeddc51b5ae 324 a1f16dd269dc295e715f2d00f9c26b43 680.4 1263.6 226.8 421.2 80 64 80 80 0 0 +locus_13:12:0:4 4 12 1098 1098 1 1098 1 1098 1098 0 100.0 100 100 plus 0.0 2028 12 locus_13 SALM_1934 - 3.9 M 1098 8f300259dcb46224bdc1fe5273107324 366 b16bc26e42f5bc4327504b1ec8b2d53d 768.6 1427.4 256.2 475.8 80 64 80 80 0 0 +locus_14:13:0:5 5 13 1281 1281 1 1281 1 1281 1281 0 100.0 100 100 plus 0.0 2366 13 locus_14 SALM_2871 + @ N 1281 b9060019038526aa6fc38d2f7510edc6 427 ed11561b2bedaa12c7a28eb0e9346101 896.7 1665.3 298.9 555.1 80 64 80 80 0 0 +locus_15:14:0:6 6 14 1434 1434 1 1434 1 1434 1434 0 100.0 100 100 plus 0.0 2649 14 locus_15 SALM_583 = DMT family transporter O 1434 bc98c2fe196a68a79036814396513a8d 478 16cb2acec887d5861327e04f2705b8ce 1003.8 1864.2 334.6 621.4 80 64 80 80 0 0 +locus_16:15:0:7 7 15 1464 1464 1 1464 1 1464 1464 0 100.0 100 100 plus 0.0 2704 15 locus_16 SALM_780 < murein transglycosylase A P 1464 16e55766c603fe33c9e75d8e81743ae2 488 b20314e55f9713235e9c4ea5817b56df 1024.8 1903.2 341.6 634.4 80 64 80 80 0 0 +locus_17:16:0:8 8 16 1836 1836 1 1836 1 1836 1836 0 100.0 100 100 plus 0.0 3391 16 locus_17 SALM_1937 > GTPase HflX Q 1836 a0d97d985483413f3c18bfe5833ae9ce 612 d6012eddc7a6e8d7d40761d00ed71a5a 1285.2 2386.8 428.4 795.6 80 64 80 80 0 0 +locus_18:17:0:9 9 17 1914 1914 1 1914 1 1914 1914 0 100.0 100 100 plus 0.0 3535 17 locus_18 SALM_1997 ? biosynthetic arginine decarboxylase R 1914 b3021e979faa7600756c06dfadfcf14c 638 e68e83956ee1d4c685571e5348c8def1 1339.8 2488.2 446.6 829.4 80 64 80 80 0 0 +locus_19:18:0:10 10 18 2037 2037 1 2037 1 2037 2037 0 100.0 100 100 plus 0.0 3762 18 locus_19 SALM_9926 , https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662 S 2037 a012eee23637b48e39b00808a057e35d 679 1302fea1dbd5bb756db34e09b863ad44 1425.9 2648.1 475.3 882.7 80 64 80 80 0 0 +locus_2:1:0:11 11 1 285 285 1 285 1 285 285 0 100.0 100 100 plus 1.03e-152 527 1 locus_2 SALM_120 @ outer membrane protein B 285 e35184c8ff18e9116fc8faef20532f56 95 2d9e7f7f0d11bf02db860b0799e7924d 199.5 370.5 66.5 123.5 80 64 80 80 0 0 +locus_20:19:0:12 12 19 4935 4935 1 4935 1 4935 4935 0 100.0 100 100 plus 0.0 9114 19 locus_20 SALM_6064 . alpha-2-macroglobulin family protein T 4935 4461918e985715e4a2b07494e1f91326 1645 12ff39a1933fa478729b142976b0a659 3454.5 6415.5 1151.5 2138.5 80 64 80 80 0 0 +locus_3:2:0:13 13 2 327 327 1 327 1 327 327 0 100.0 100 100 plus 5.34e-176 604 2 locus_3 SALM_2016 # tRNA (guanosine(46)-N7)-methyltransferase TrmB C 327 670705cd2a59c4a23a897ac656a888fe 109 4277f8bbf1fd6682001da089fea83d04 228.9 425.1 76.3 141.7 80 64 80 80 0 0 +locus_4:3:0:14 14 3 417 417 1 417 1 417 417 0 100.0 100 100 plus 0.0 771 3 locus_4 SALM_8644 $ AZ624_004720 AZ624_004720 417 ac1b21798c0f672ad26f5a91ea278590 139 0c25367401155278f34832f184ab44e6 291.9 542.1 97.3 180.7 80 64 80 80 0 0 +locus_5:4:0:15 15 4 444 444 1 444 1 444 444 0 100.0 100 100 plus 0.0 821 4 locus_5 SALM_1876 % SPI-1 type III secretion system invasion lipoprotein InvH E 444 d00defcca8588f21ce16fa1d0ac13389 148 c8bf12a8057fc5e541fcd4924136a40d 310.8 577.2 103.6 192.4 80 64 80 80 0 0 +locus_6:5:0:16 16 5 543 543 1 543 1 543 543 0 100.0 100 100 plus 0.0 1003 5 locus_6 SALM_640 ^ MOSC domain-containing protein F 543 a11561f2804e2c32c78049f8b9aeb517 181 1f6a45ea291940ef2c17ec1cfdd5fbdd 380.1 705.9 126.7 235.3 80 64 80 80 0 0 +locus_7:6:0:17 17 6 606 606 1 606 1 606 606 0 100.0 100 100 plus 0.0 1120 6 locus_7 SALM_1501 & India: Vellore G 606 dc94bf1ec4ff9bed2a1f460cbd958656 202 62252b3326997117f127efb88ff09294 424.2 787.8 141.4 262.6 80 64 80 80 0 0 +locus_8:7:0:18 18 7 642 642 1 642 1 642 642 0 100.0 100 100 plus 0.0 1186 7 locus_8 SALM_756 * DNA polymerase III subunit delta' H 642 7ebe74afecf146ec4db816c8deced64f 214 2449629747a7c58f0f2cad411db87178 449.4 834.6 149.8 278.2 80 64 80 80 0 0 +locus_9:8:0:19 19 8 684 684 1 684 1 684 684 0 100.0 100 100 plus 0.0 1264 8 locus_9 SALM_7353 1 fimbrial assembly chaperone I 684 41ebb36872854b2b33c8c028e23d8ad1 228 a75a93991228940fb46d917f238beb5a 478.8 889.2 159.6 296.4 80 64 80 80 0 0 diff --git a/tests/test_data/outputs/report/normal/G8/profile.json b/tests/test_data/outputs/report/normal/G8/profile.json new file mode 100755 index 0000000..2386944 --- /dev/null +++ b/tests/test_data/outputs/report/normal/G8/profile.json @@ -0,0 +1,24 @@ +{ + "G8": { + "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", + "locus_2": "e35184c8ff18e9116fc8faef20532f56", + "locus_3": "670705cd2a59c4a23a897ac656a888fe", + "locus_4": "ac1b21798c0f672ad26f5a91ea278590", + "locus_5": "d00defcca8588f21ce16fa1d0ac13389", + "locus_6": "a11561f2804e2c32c78049f8b9aeb517", + "locus_7": "dc94bf1ec4ff9bed2a1f460cbd958656", + "locus_8": "7ebe74afecf146ec4db816c8deced64f", + "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", + "locus_10": "fd6284b58a891cf02058906c9ee37a00", + "locus_11": "5b128d659955716833ce42f2bb060212", + "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", + "locus_13": "8f300259dcb46224bdc1fe5273107324", + "locus_14": "b9060019038526aa6fc38d2f7510edc6", + "locus_15": "bc98c2fe196a68a79036814396513a8d", + "locus_16": "16e55766c603fe33c9e75d8e81743ae2", + "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", + "locus_18": "b3021e979faa7600756c06dfadfcf14c", + "locus_19": "a012eee23637b48e39b00808a057e35d", + "locus_20": "4461918e985715e4a2b07494e1f91326" + } +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/normal/G8/protein.hits.txt b/tests/test_data/outputs/report/normal/G8/protein.hits.txt new file mode 100755 index 0000000..56b16b3 --- /dev/null +++ b/tests/test_data/outputs/report/normal/G8/protein.hits.txt @@ -0,0 +1,21 @@ +query_name qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore seq_id locus_name locus_name_alt locus_product locus_description locus_uid dna_seq_len dna_seq_hash aa_seq_len aa_seq_hash dna_min_len dna_max_len aa_min_len aa_max_len dna_min_ident aa_min_ident min_dna_match_cov min_aa_match_cov count_int_stops dna_ambig_count +locus_1:0:0:0 0 0 34 34 1 34 1 34 34 0 100.0 100 100 4.94e-20 64.3 0 locus_1 SALM_11273 ! hypothetical protein 1 102 d17b02d12afa7f832ee37df6f24a8f55 34 84ac553cb45dd790c497c27152888f02 71.4 132.6 23.8 44.2 80 64 80 80 0 0 +locus_10:9:0:1 1 9 254 254 2 254 2 254 253 0 100.0 99 99 0.0 512.0 9 locus_10 SALM_1891 200.96 5'/3'-nucleotidase SurE J 762 fe04d17ec353c08b903c85fc0ca4dc02 254 bd09702e070040e0fc8d2ec3b830812c 533.4 990.6 177.8 330.2 80 64 80 80 0 0 +locus_11:10:0:2 2 10 286 286 1 286 1 286 286 0 100.0 100 100 0.0 579.0 10 locus_11 SALM_1452 | 1-phosphofructokinase K 858 5b128d659955716833ce42f2bb060212 286 4daecf1a6ccae76e1d97a4ce9ee4ff0b 600.6 1115.4 200.2 371.8 80 64 80 80 0 0 +locus_12:11:0:3 3 11 324 324 1 324 1 324 324 0 100.0 100 100 0.0 640.0 11 locus_12 SALM_11020 _ 1 L 972 eb72da68c159497d5f0c8eeddc51b5ae 324 a1f16dd269dc295e715f2d00f9c26b43 680.4 1263.6 226.8 421.2 80 64 80 80 0 0 +locus_13:12:0:4 4 12 366 366 1 366 1 366 366 0 100.0 100 100 0.0 754.0 12 locus_13 SALM_1934 - 3.9 M 1098 8f300259dcb46224bdc1fe5273107324 366 b16bc26e42f5bc4327504b1ec8b2d53d 768.6 1427.4 256.2 475.8 80 64 80 80 0 0 +locus_14:13:0:5 5 13 427 427 1 427 1 427 427 0 100.0 100 100 0.0 869.0 13 locus_14 SALM_2871 + @ N 1281 b9060019038526aa6fc38d2f7510edc6 427 ed11561b2bedaa12c7a28eb0e9346101 896.7 1665.3 298.9 555.1 80 64 80 80 0 0 +locus_15:14:0:6 6 14 478 478 1 478 1 478 478 0 100.0 100 100 0.0 972.0 14 locus_15 SALM_583 = DMT family transporter O 1434 bc98c2fe196a68a79036814396513a8d 478 16cb2acec887d5861327e04f2705b8ce 1003.8 1864.2 334.6 621.4 80 64 80 80 0 0 +locus_16:15:0:7 7 15 488 488 1 488 1 488 488 0 100.0 100 100 0.0 1004.0 15 locus_16 SALM_780 < murein transglycosylase A P 1464 16e55766c603fe33c9e75d8e81743ae2 488 b20314e55f9713235e9c4ea5817b56df 1024.8 1903.2 341.6 634.4 80 64 80 80 0 0 +locus_17:16:0:8 8 16 612 612 1 612 1 612 612 0 100.0 100 100 0.0 1241.0 16 locus_17 SALM_1937 > GTPase HflX Q 1836 a0d97d985483413f3c18bfe5833ae9ce 612 d6012eddc7a6e8d7d40761d00ed71a5a 1285.2 2386.8 428.4 795.6 80 64 80 80 0 0 +locus_18:17:0:9 9 17 638 638 1 638 1 638 638 0 100.0 100 100 0.0 1326.0 17 locus_18 SALM_1997 ? biosynthetic arginine decarboxylase R 1914 b3021e979faa7600756c06dfadfcf14c 638 e68e83956ee1d4c685571e5348c8def1 1339.8 2488.2 446.6 829.4 80 64 80 80 0 0 +locus_19:18:0:10 10 18 679 679 1 679 1 679 679 0 100.0 100 100 0.0 1419.0 18 locus_19 SALM_9926 , https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662 S 2037 a012eee23637b48e39b00808a057e35d 679 1302fea1dbd5bb756db34e09b863ad44 1425.9 2648.1 475.3 882.7 80 64 80 80 0 0 +locus_2:1:0:11 11 1 95 95 1 95 1 95 95 0 100.0 100 100 1.2299999999999998e-68 191.0 1 locus_2 SALM_120 @ outer membrane protein B 285 e35184c8ff18e9116fc8faef20532f56 95 2d9e7f7f0d11bf02db860b0799e7924d 199.5 370.5 66.5 123.5 80 64 80 80 0 0 +locus_20:19:0:12 12 19 1645 1645 1 1645 1 1645 1645 0 100.0 100 100 0.0 3332.0 19 locus_20 SALM_6064 . alpha-2-macroglobulin family protein T 4935 4461918e985715e4a2b07494e1f91326 1645 12ff39a1933fa478729b142976b0a659 3454.5 6415.5 1151.5 2138.5 80 64 80 80 0 0 +locus_3:2:0:13 13 2 109 109 1 109 1 109 109 0 100.0 100 100 3.09e-81 224.0 2 locus_3 SALM_2016 # tRNA (guanosine(46)-N7)-methyltransferase TrmB C 327 670705cd2a59c4a23a897ac656a888fe 109 4277f8bbf1fd6682001da089fea83d04 228.9 425.1 76.3 141.7 80 64 80 80 0 0 +locus_4:3:0:14 14 3 139 139 1 139 1 139 139 0 100.0 100 100 2.34e-106 290.0 3 locus_4 SALM_8644 $ AZ624_004720 AZ624_004720 417 ac1b21798c0f672ad26f5a91ea278590 139 0c25367401155278f34832f184ab44e6 291.9 542.1 97.3 180.7 80 64 80 80 0 0 +locus_5:4:0:15 15 4 148 148 1 148 1 148 148 0 100.0 100 100 1.0400000000000001e-110 302.0 4 locus_5 SALM_1876 % SPI-1 type III secretion system invasion lipoprotein InvH E 444 d00defcca8588f21ce16fa1d0ac13389 148 c8bf12a8057fc5e541fcd4924136a40d 310.8 577.2 103.6 192.4 80 64 80 80 0 0 +locus_6:5:0:16 16 5 181 181 1 181 1 181 181 0 100.0 100 100 1.7899999999999999e-140 380.0 5 locus_6 SALM_640 ^ MOSC domain-containing protein F 543 a11561f2804e2c32c78049f8b9aeb517 181 1f6a45ea291940ef2c17ec1cfdd5fbdd 380.1 705.9 126.7 235.3 80 64 80 80 0 0 +locus_7:6:0:17 17 6 202 202 1 202 1 202 202 0 100.0 100 100 3.71e-154 416.0 6 locus_7 SALM_1501 & India: Vellore G 606 dc94bf1ec4ff9bed2a1f460cbd958656 202 62252b3326997117f127efb88ff09294 424.2 787.8 141.4 262.6 80 64 80 80 0 0 +locus_8:7:0:18 18 7 214 214 1 214 1 214 214 0 100.0 100 100 1.75e-157 426.0 7 locus_8 SALM_756 * DNA polymerase III subunit delta' H 642 7ebe74afecf146ec4db816c8deced64f 214 2449629747a7c58f0f2cad411db87178 449.4 834.6 149.8 278.2 80 64 80 80 0 0 +locus_9:8:0:19 19 8 228 228 1 228 1 228 228 0 100.0 100 100 6.32e-172 463.0 8 locus_9 SALM_7353 1 fimbrial assembly chaperone I 684 41ebb36872854b2b33c8c028e23d8ad1 228 a75a93991228940fb46d917f238beb5a 478.8 889.2 159.6 296.4 80 64 80 80 0 0 diff --git a/tests/test_data/outputs/report/normal/G9/nucleotide.hits.txt b/tests/test_data/outputs/report/normal/G9/nucleotide.hits.txt new file mode 100755 index 0000000..56b0986 --- /dev/null +++ b/tests/test_data/outputs/report/normal/G9/nucleotide.hits.txt @@ -0,0 +1,20 @@ +query_name qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore seq_id locus_name locus_name_alt locus_product locus_description locus_uid dna_seq_len dna_seq_hash aa_seq_len aa_seq_hash dna_min_len dna_max_len aa_min_len aa_max_len dna_min_ident aa_min_ident min_dna_match_cov min_aa_match_cov count_int_stops dna_ambig_count +locus_1:0:0:0 0 0 102 102 1 102 1 102 102 0 100.0 100 100 plus 1.8e-51 189 0 locus_1 SALM_11273 ! hypothetical protein 1 102 d17b02d12afa7f832ee37df6f24a8f55 34 84ac553cb45dd790c497c27152888f02 71.4 132.6 23.8 44.2 80 64 80 80 0 0 +locus_11:10:0:1 1 10 858 858 1 858 1 858 858 19 97.786 100 100 plus 0.0 1480 10 locus_11 SALM_1452 | 1-phosphofructokinase K 858 5b128d659955716833ce42f2bb060212 286 4daecf1a6ccae76e1d97a4ce9ee4ff0b 600.6 1115.4 200.2 371.8 80 64 80 80 0 0 +locus_12:11:0:2 2 11 972 972 1 972 1 972 972 0 100.0 100 100 plus 0.0 1796 11 locus_12 SALM_11020 _ 1 L 972 eb72da68c159497d5f0c8eeddc51b5ae 324 a1f16dd269dc295e715f2d00f9c26b43 680.4 1263.6 226.8 421.2 80 64 80 80 0 0 +locus_13:12:0:3 3 12 1098 1098 1 1098 1 1098 1098 0 100.0 100 100 plus 0.0 2028 12 locus_13 SALM_1934 - 3.9 M 1098 8f300259dcb46224bdc1fe5273107324 366 b16bc26e42f5bc4327504b1ec8b2d53d 768.6 1427.4 256.2 475.8 80 64 80 80 0 0 +locus_14:13:0:4 4 13 1281 1281 1 1281 1 1281 1281 11 99.141 100 100 plus 0.0 2305 13 locus_14 SALM_2871 + @ N 1281 b9060019038526aa6fc38d2f7510edc6 427 ed11561b2bedaa12c7a28eb0e9346101 896.7 1665.3 298.9 555.1 80 64 80 80 0 0 +locus_15:14:0:5 5 14 1434 1434 1 1434 1 1434 1434 0 100.0 100 100 plus 0.0 2649 14 locus_15 SALM_583 = DMT family transporter O 1434 bc98c2fe196a68a79036814396513a8d 478 16cb2acec887d5861327e04f2705b8ce 1003.8 1864.2 334.6 621.4 80 64 80 80 0 0 +locus_16:15:0:6 6 15 1464 1464 1 1464 1 1464 1464 15 98.975 100 100 plus 0.0 2621 15 locus_16 SALM_780 < murein transglycosylase A P 1464 16e55766c603fe33c9e75d8e81743ae2 488 b20314e55f9713235e9c4ea5817b56df 1024.8 1903.2 341.6 634.4 80 64 80 80 0 0 +locus_17:16:0:7 7 16 1836 1836 1 1836 1 1836 1836 0 100.0 100 100 plus 0.0 3391 16 locus_17 SALM_1937 > GTPase HflX Q 1836 a0d97d985483413f3c18bfe5833ae9ce 612 d6012eddc7a6e8d7d40761d00ed71a5a 1285.2 2386.8 428.4 795.6 80 64 80 80 0 0 +locus_18:17:0:8 8 17 1914 1914 1 1914 1 1914 1914 0 100.0 100 100 plus 0.0 3535 17 locus_18 SALM_1997 ? biosynthetic arginine decarboxylase R 1914 b3021e979faa7600756c06dfadfcf14c 638 e68e83956ee1d4c685571e5348c8def1 1339.8 2488.2 446.6 829.4 80 64 80 80 0 0 +locus_19:18:0:9 9 18 2037 2037 1 2037 1 2037 2037 16 99.215 100 100 plus 0.0 3674 18 locus_19 SALM_9926 , https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662 S 2037 a012eee23637b48e39b00808a057e35d 679 1302fea1dbd5bb756db34e09b863ad44 1425.9 2648.1 475.3 882.7 80 64 80 80 0 0 +locus_2:1:0:10 10 1 285 285 1 285 1 285 285 17 94.035 100 100 plus 2.3100000000000004e-124 433 1 locus_2 SALM_120 @ outer membrane protein B 285 e35184c8ff18e9116fc8faef20532f56 95 2d9e7f7f0d11bf02db860b0799e7924d 199.5 370.5 66.5 123.5 80 64 80 80 0 0 +locus_20:19:0:11 11 19 4935 4935 1 4935 1 4935 4935 0 100.0 100 100 plus 0.0 9114 19 locus_20 SALM_6064 . alpha-2-macroglobulin family protein T 4935 4461918e985715e4a2b07494e1f91326 1645 12ff39a1933fa478729b142976b0a659 3454.5 6415.5 1151.5 2138.5 80 64 80 80 0 0 +locus_3:2:0:12 12 2 327 327 1 327 1 327 327 0 100.0 100 100 plus 5.34e-176 604 2 locus_3 SALM_2016 # tRNA (guanosine(46)-N7)-methyltransferase TrmB C 327 670705cd2a59c4a23a897ac656a888fe 109 4277f8bbf1fd6682001da089fea83d04 228.9 425.1 76.3 141.7 80 64 80 80 0 0 +locus_4:3:0:13 13 3 417 417 1 417 1 417 417 11 97.362 100 100 plus 0.0 710 3 locus_4 SALM_8644 $ AZ624_004720 AZ624_004720 417 ac1b21798c0f672ad26f5a91ea278590 139 0c25367401155278f34832f184ab44e6 291.9 542.1 97.3 180.7 80 64 80 80 0 0 +locus_5:4:0:14 14 4 444 444 1 444 1 444 444 15 96.622 100 100 plus 0.0 737 4 locus_5 SALM_1876 % SPI-1 type III secretion system invasion lipoprotein InvH E 444 d00defcca8588f21ce16fa1d0ac13389 148 c8bf12a8057fc5e541fcd4924136a40d 310.8 577.2 103.6 192.4 80 64 80 80 0 0 +locus_6:5:0:15 15 5 543 543 1 543 1 543 543 0 100.0 100 100 plus 0.0 1003 5 locus_6 SALM_640 ^ MOSC domain-containing protein F 543 a11561f2804e2c32c78049f8b9aeb517 181 1f6a45ea291940ef2c17ec1cfdd5fbdd 380.1 705.9 126.7 235.3 80 64 80 80 0 0 +locus_7:6:0:16 16 6 606 606 1 606 1 606 606 15 97.525 100 100 plus 0.0 1037 6 locus_7 SALM_1501 & India: Vellore G 606 dc94bf1ec4ff9bed2a1f460cbd958656 202 62252b3326997117f127efb88ff09294 424.2 787.8 141.4 262.6 80 64 80 80 0 0 +locus_8:7:0:17 17 7 642 642 1 642 1 642 642 0 100.0 100 100 plus 0.0 1186 7 locus_8 SALM_756 * DNA polymerase III subunit delta' H 642 7ebe74afecf146ec4db816c8deced64f 214 2449629747a7c58f0f2cad411db87178 449.4 834.6 149.8 278.2 80 64 80 80 0 0 +locus_9:8:0:18 18 8 684 684 1 684 1 684 684 0 100.0 100 100 plus 0.0 1264 8 locus_9 SALM_7353 1 fimbrial assembly chaperone I 684 41ebb36872854b2b33c8c028e23d8ad1 228 a75a93991228940fb46d917f238beb5a 478.8 889.2 159.6 296.4 80 64 80 80 0 0 diff --git a/tests/test_data/outputs/report/normal/G9/profile.json b/tests/test_data/outputs/report/normal/G9/profile.json new file mode 100755 index 0000000..defcdbe --- /dev/null +++ b/tests/test_data/outputs/report/normal/G9/profile.json @@ -0,0 +1,24 @@ +{ + "G9": { + "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", + "locus_2": "8b70e777f6bbf2c91ff75947824b5976", + "locus_3": "670705cd2a59c4a23a897ac656a888fe", + "locus_4": "73790840c76943caac0ebb3b2b3f0b98", + "locus_5": "8cf4341689dd00f74adfcc43d1f4a35e", + "locus_6": "a11561f2804e2c32c78049f8b9aeb517", + "locus_7": "49d9878c9d3071aa1d2f26cb947b784c", + "locus_8": "7ebe74afecf146ec4db816c8deced64f", + "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", + "locus_10": "-", + "locus_11": "c4266f2f24fdd8e039113c6b0955af9f", + "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", + "locus_13": "8f300259dcb46224bdc1fe5273107324", + "locus_14": "2fa0b06ed72e36b4071cab9d0b4f87d0", + "locus_15": "bc98c2fe196a68a79036814396513a8d", + "locus_16": "a9b3cb97dac3cda6e932a49bf9a507bd", + "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", + "locus_18": "b3021e979faa7600756c06dfadfcf14c", + "locus_19": "de32372598811d63bcc1a0eaf6872644", + "locus_20": "4461918e985715e4a2b07494e1f91326" + } +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/normal/G9/protein.hits.txt b/tests/test_data/outputs/report/normal/G9/protein.hits.txt new file mode 100755 index 0000000..8d4f5d1 --- /dev/null +++ b/tests/test_data/outputs/report/normal/G9/protein.hits.txt @@ -0,0 +1,20 @@ +query_name qseqid sseqid qlen slen qstart qend sstart send length mismatch pident qcovhsp qcovs sstrand evalue bitscore seq_id locus_name locus_name_alt locus_product locus_description locus_uid dna_seq_len dna_seq_hash aa_seq_len aa_seq_hash dna_min_len dna_max_len aa_min_len aa_max_len dna_min_ident aa_min_ident min_dna_match_cov min_aa_match_cov count_int_stops dna_ambig_count +locus_1:0:0:0 0 0 34 34 1 34 1 34 34 0 100.0 100 100 4.94e-20 64.3 0 locus_1 SALM_11273 ! hypothetical protein 1 102 d17b02d12afa7f832ee37df6f24a8f55 34 84ac553cb45dd790c497c27152888f02 71.4 132.6 23.8 44.2 80 64 80 80 0 0 +locus_11:10:0:1 1 10 286 286 1 286 1 286 286 17 94.056 100 100 0.0 518.0 10 locus_11 SALM_1452 | 1-phosphofructokinase K 858 5b128d659955716833ce42f2bb060212 286 4daecf1a6ccae76e1d97a4ce9ee4ff0b 600.6 1115.4 200.2 371.8 80 64 80 80 0 0 +locus_12:11:0:2 2 11 324 324 1 324 1 324 324 0 100.0 100 100 0.0 640.0 11 locus_12 SALM_11020 _ 1 L 972 eb72da68c159497d5f0c8eeddc51b5ae 324 a1f16dd269dc295e715f2d00f9c26b43 680.4 1263.6 226.8 421.2 80 64 80 80 0 0 +locus_13:12:0:3 3 12 366 366 1 366 1 366 366 0 100.0 100 100 0.0 754.0 12 locus_13 SALM_1934 - 3.9 M 1098 8f300259dcb46224bdc1fe5273107324 366 b16bc26e42f5bc4327504b1ec8b2d53d 768.6 1427.4 256.2 475.8 80 64 80 80 0 0 +locus_14:13:0:4 4 13 427 427 1 427 1 427 427 9 97.892 100 100 0.0 846.0 13 locus_14 SALM_2871 + @ N 1281 b9060019038526aa6fc38d2f7510edc6 427 ed11561b2bedaa12c7a28eb0e9346101 896.7 1665.3 298.9 555.1 80 64 80 80 0 0 +locus_15:14:0:5 5 14 478 478 1 478 1 478 478 0 100.0 100 100 0.0 972.0 14 locus_15 SALM_583 = DMT family transporter O 1434 bc98c2fe196a68a79036814396513a8d 478 16cb2acec887d5861327e04f2705b8ce 1003.8 1864.2 334.6 621.4 80 64 80 80 0 0 +locus_16:15:0:6 6 15 488 488 1 488 1 488 488 14 97.131 100 100 0.0 972.0 15 locus_16 SALM_780 < murein transglycosylase A P 1464 16e55766c603fe33c9e75d8e81743ae2 488 b20314e55f9713235e9c4ea5817b56df 1024.8 1903.2 341.6 634.4 80 64 80 80 0 0 +locus_17:16:0:7 7 16 612 612 1 612 1 612 612 0 100.0 100 100 0.0 1241.0 16 locus_17 SALM_1937 > GTPase HflX Q 1836 a0d97d985483413f3c18bfe5833ae9ce 612 d6012eddc7a6e8d7d40761d00ed71a5a 1285.2 2386.8 428.4 795.6 80 64 80 80 0 0 +locus_18:17:0:8 8 17 638 638 1 638 1 638 638 0 100.0 100 100 0.0 1326.0 17 locus_18 SALM_1997 ? biosynthetic arginine decarboxylase R 1914 b3021e979faa7600756c06dfadfcf14c 638 e68e83956ee1d4c685571e5348c8def1 1339.8 2488.2 446.6 829.4 80 64 80 80 0 0 +locus_19:18:0:9 9 18 679 679 1 679 1 679 679 8 98.822 100 100 0.0 1403.0 18 locus_19 SALM_9926 , https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662 S 2037 a012eee23637b48e39b00808a057e35d 679 1302fea1dbd5bb756db34e09b863ad44 1425.9 2648.1 475.3 882.7 80 64 80 80 0 0 +locus_2:1:0:10 10 1 95 95 1 95 1 95 95 11 88.421 100 100 2.89e-51 147.0 1 locus_2 SALM_120 @ outer membrane protein B 285 e35184c8ff18e9116fc8faef20532f56 95 2d9e7f7f0d11bf02db860b0799e7924d 199.5 370.5 66.5 123.5 80 64 80 80 0 0 +locus_20:19:0:11 11 19 1645 1645 1 1645 1 1645 1645 0 100.0 100 100 0.0 3332.0 19 locus_20 SALM_6064 . alpha-2-macroglobulin family protein T 4935 4461918e985715e4a2b07494e1f91326 1645 12ff39a1933fa478729b142976b0a659 3454.5 6415.5 1151.5 2138.5 80 64 80 80 0 0 +locus_3:2:0:12 12 2 109 109 1 109 1 109 109 0 100.0 100 100 3.09e-81 224.0 2 locus_3 SALM_2016 # tRNA (guanosine(46)-N7)-methyltransferase TrmB C 327 670705cd2a59c4a23a897ac656a888fe 109 4277f8bbf1fd6682001da089fea83d04 228.9 425.1 76.3 141.7 80 64 80 80 0 0 +locus_4:3:0:13 13 3 139 139 1 139 1 139 139 9 93.525 100 100 3.6999999999999996e-98 269.0 3 locus_4 SALM_8644 $ AZ624_004720 AZ624_004720 417 ac1b21798c0f672ad26f5a91ea278590 139 0c25367401155278f34832f184ab44e6 291.9 542.1 97.3 180.7 80 64 80 80 0 0 +locus_5:4:0:14 14 4 148 148 1 148 1 148 148 11 92.568 100 100 1.5600000000000001e-97 268.0 4 locus_5 SALM_1876 % SPI-1 type III secretion system invasion lipoprotein InvH E 444 d00defcca8588f21ce16fa1d0ac13389 148 c8bf12a8057fc5e541fcd4924136a40d 310.8 577.2 103.6 192.4 80 64 80 80 0 0 +locus_6:5:0:15 15 5 181 181 1 181 1 181 181 0 100.0 100 100 1.7899999999999999e-140 380.0 5 locus_6 SALM_640 ^ MOSC domain-containing protein F 543 a11561f2804e2c32c78049f8b9aeb517 181 1f6a45ea291940ef2c17ec1cfdd5fbdd 380.1 705.9 126.7 235.3 80 64 80 80 0 0 +locus_7:6:0:16 16 6 202 202 1 202 1 202 202 14 93.069 100 100 2.36e-141 384.0 6 locus_7 SALM_1501 & India: Vellore G 606 dc94bf1ec4ff9bed2a1f460cbd958656 202 62252b3326997117f127efb88ff09294 424.2 787.8 141.4 262.6 80 64 80 80 0 0 +locus_8:7:0:17 17 7 214 214 1 214 1 214 214 0 100.0 100 100 1.75e-157 426.0 7 locus_8 SALM_756 * DNA polymerase III subunit delta' H 642 7ebe74afecf146ec4db816c8deced64f 214 2449629747a7c58f0f2cad411db87178 449.4 834.6 149.8 278.2 80 64 80 80 0 0 +locus_9:8:0:18 18 8 228 228 1 228 1 228 228 0 100.0 100 100 6.32e-172 463.0 8 locus_9 SALM_7353 1 fimbrial assembly chaperone I 684 41ebb36872854b2b33c8c028e23d8ad1 228 a75a93991228940fb46d917f238beb5a 478.8 889.2 159.6 296.4 80 64 80 80 0 0 diff --git a/tests/test_data/outputs/search/G1/blast/nucleotide/hsps.txt b/tests/test_data/outputs/search/G1/blast/nucleotide/hsps.txt new file mode 100755 index 0000000..cdab2ba --- /dev/null +++ b/tests/test_data/outputs/search/G1/blast/nucleotide/hsps.txt @@ -0,0 +1,20 @@ +0 0 102 102 1 102 1 102 102 0 100.000 100 100 plus 1.80e-51 189 +1 9 762 762 1 762 1 762 762 0 100.000 100 100 plus 0.0 1408 +2 10 858 858 1 858 1 858 858 19 97.786 100 100 plus 0.0 1480 +3 11 972 972 1 972 1 972 972 0 100.000 100 100 plus 0.0 1796 +4 12 1098 1098 1 1098 1 1098 1098 0 100.000 100 100 plus 0.0 2028 +5 13 1281 1281 1 1281 1 1281 1281 11 99.141 100 100 plus 0.0 2305 +6 14 1434 1434 1 1434 1 1434 1434 0 100.000 100 100 plus 0.0 2649 +7 15 1464 1464 1 1464 1 1464 1464 15 98.975 100 100 plus 0.0 2621 +8 16 1836 1836 1 1836 1 1836 1836 0 100.000 100 100 plus 0.0 3391 +9 17 1914 1914 1 1914 1 1914 1914 0 100.000 100 100 plus 0.0 3535 +10 18 2037 2037 1 2037 1 2037 2037 16 99.215 100 100 plus 0.0 3674 +11 1 285 285 1 285 1 285 285 17 94.035 100 100 plus 2.31e-124 433 +12 19 4935 4935 1 4935 1 4935 4935 0 100.000 100 100 plus 0.0 9114 +13 2 327 327 1 327 1 327 327 0 100.000 100 100 plus 5.34e-176 604 +14 3 417 417 1 417 1 417 417 11 97.362 100 100 plus 0.0 710 +15 4 444 444 1 444 1 444 444 15 96.622 100 100 plus 0.0 737 +16 5 543 543 1 543 1 543 543 0 100.000 100 100 plus 0.0 1003 +17 6 606 606 1 606 1 606 606 15 97.525 100 100 plus 0.0 1037 +18 7 642 642 1 642 1 642 642 0 100.000 100 100 plus 0.0 1186 +19 8 684 684 1 684 1 684 684 0 100.000 100 100 plus 0.0 1264 diff --git a/tests/test_data/outputs/search/G1/blast/nucleotide/queries.fasta b/tests/test_data/outputs/search/G1/blast/nucleotide/queries.fasta new file mode 100755 index 0000000..ce3c3a9 --- /dev/null +++ b/tests/test_data/outputs/search/G1/blast/nucleotide/queries.fasta @@ -0,0 +1,40 @@ +>0 +atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa +>1 +atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa +>2 +gtgaacctggtgaaaaccacgggtctgcacgcggcgggcaaaggcattaacgttgccaaagtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccgacttcaacttttccggctttgacgtgactcccgcagactgggaacgctttgttaacgactccctgagctggctgggtcagttcgatatggtctgcgtcagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatctttgatagtagccgtgaagcgttagtcgccggtcttaaagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcacgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgtgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggtaagtaccgtcggcgcaggcgattccatggttggcggattgatttacggcctgctgatgcgcgagtccagcgaacatacgctgcgtctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga +>3 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>4 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>5 +ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctcttccgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccaacgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcagaaaggcgggattggtttgcgcggtccgggtgaaacccagctcgaaaccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgaaaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgctgctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaaatccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtctgcaggtacgaatgccgattgtcgactggcgtcgcctctgtaaacaagaaccagcgttgatcgaatacgtgatctag +>6 +gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga +>7 +atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctggtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatcagcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacgctgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcattcaaccggaatggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaacgctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgaggcgctggcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtgagctgaccagccttgtgcgcaaccttaatcaactgctcaaaagcgagcgcgaacgttataacaaataccgcacgaccctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatcagcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcataccccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttttggccgacagcatcccacacagaaagaggaataa +>8 +atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa +>9 +atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag +>10 +atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacattgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccaccgtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggccgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattactggaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcattcagggcggtacggaagtttgtcagcaaaaactggataccatgcgcaacgcaggcgtaaaagtgaacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggcgtcagcgcagagatcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacaggagaccggcaagctcggcgagatcctgttctttatgcgtgcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccggcaataactggcagttcgacggcgacgcggaaactattgcccactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgaaattaccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa +>11 +atgaaactttttctgacaacggcagcgctaactgcgaccttgatttccggcatggcatttgccagcgatcccgtcatcccctgggcgactaacagcggcggtacggaaagcacgcatattgcggcaatgggcgaggatttgaatgcgcagcatcagcagatcacccacacgcatgaaggcgtatgggcggccaattccggcagcattcaggctgatgaagccgcgctgaccagcaacaagccgccggtacaaggccacccggaattgatgccgcatcagggctaa +>12 +atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa +>13 +atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa +>14 +ctgaaactaatacctttttatttgctcgcgctattttctgccgcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaaccactgcagataacccgccttataccatgacaatattgccgccagtagaaggctgcggggaaaaatgtgtgaaactgaacgtacagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaaggacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctataaccgacagctgcaattactttgggctggcgtaataccgaaaaataaaaaggggaaatggataacgaaaatggaaaattactggttcaggcaggtaaaagcgcactaa +>15 +atgaaaaaattttatagctgtcttcctgtctttttactgatcggctgtgcccaggtgcccctcccttcctccgtgagcaaaccggtacagcaacctggcgctcagaaagagcaactggccaacgcaaatagtattgatgagtgtcagtctcttccgtatgtgccgtcagaccttgcgaagaataaatcattatcaaaccagaacgctgataattccgcatcaaaaaatagcgcaatcagctcaagcattttttgcgaaaaatataaacaaaccaaagaacaggcgcttaccttcttccaggaacatccacaatacatgcgttcgaaagaagatgaagagcaactcatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa +>16 +atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa +>17 +gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggcctcacaacatgcctggagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgtcagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcaccatccagtcggcaacctggttttccgcaggcaggtcgcttacggcggataaaaaaatgatggattgcggcagcggtatttatgcttcaatcaatacgttactgaaaaagtcgcagaataaaaatatagtcatttttacccataatcattgtttgacgtatattgcaaaaaataagcgcggcgtgaagtttgatccggactatcttaatgcgctcgttatgtatgcggaaaacggaaaactctttttagacggtgaatttgttcccggctag +>18 +atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga +>19 +atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/search/G1/blast/protein/hsps.txt b/tests/test_data/outputs/search/G1/blast/protein/hsps.txt new file mode 100755 index 0000000..233979d --- /dev/null +++ b/tests/test_data/outputs/search/G1/blast/protein/hsps.txt @@ -0,0 +1,20 @@ +0 0 34 34 1 34 1 34 34 0 100.000 100 100 N/A 4.94e-20 64.3 +1 9 254 254 1 254 1 254 254 0 100.000 100 100 N/A 0.0 514 +2 10 286 286 1 286 1 286 286 17 94.056 100 100 N/A 0.0 518 +3 11 324 324 1 324 1 324 324 0 100.000 100 100 N/A 0.0 640 +4 12 366 366 1 366 1 366 366 0 100.000 100 100 N/A 0.0 754 +5 13 427 427 1 427 1 427 427 9 97.892 100 100 N/A 0.0 846 +6 14 478 478 1 478 1 478 478 0 100.000 100 100 N/A 0.0 972 +7 15 488 488 1 488 1 488 488 14 97.131 100 100 N/A 0.0 972 +8 16 612 612 1 612 1 612 612 0 100.000 100 100 N/A 0.0 1241 +9 17 638 638 1 638 1 638 638 0 100.000 100 100 N/A 0.0 1326 +10 18 679 679 1 679 1 679 679 8 98.822 100 100 N/A 0.0 1403 +11 1 95 95 1 95 1 95 95 11 88.421 100 100 N/A 2.89e-51 147 +12 19 1645 1645 1 1645 1 1645 1645 0 100.000 100 100 N/A 0.0 3332 +13 2 109 109 1 109 1 109 109 0 100.000 100 100 N/A 3.09e-81 224 +14 3 139 139 1 139 1 139 139 9 93.525 100 100 N/A 3.70e-98 269 +15 4 148 148 1 148 1 148 148 11 92.568 100 100 N/A 1.56e-97 268 +16 5 181 181 1 181 1 181 181 0 100.000 100 100 N/A 1.79e-140 380 +17 6 202 202 1 202 1 202 202 14 93.069 100 100 N/A 2.36e-141 384 +18 7 214 214 1 214 1 214 214 0 100.000 100 100 N/A 1.75e-157 426 +19 8 228 228 1 228 1 228 228 0 100.000 100 100 N/A 6.32e-172 463 diff --git a/tests/test_data/outputs/search/G1/blast/protein/queries.fasta b/tests/test_data/outputs/search/G1/blast/protein/queries.fasta new file mode 100755 index 0000000..3ac162b --- /dev/null +++ b/tests/test_data/outputs/search/G1/blast/protein/queries.fasta @@ -0,0 +1,40 @@ +>0 +MYDPPFLEALMITASFFAIFIIIVVSVLLLEGD* +>1 +MRILLSNDDGVHAPGIQTLAKALREFADVQVVAPDRNRSGASNSLTLESSLRTFTFDNGDIAVQMGTPTDCVYLGVNALMRPRPDIVVSGINAGPNLGDDVIYSGTVAAAMEGRHLGFPALAVSLNGYQHYDTAAAVTCALLRGLSREPLRTGRILNVNVPDLPLAQVKGIRVTRCGSRHPADKVIPQEDPRGNTLYWIGPPGDKYDAGPDTDFAAVDEGYVSVTPLHVDLTAHSAHDVVSDWLDSVGVGTQW* +>2 +VNLVKTTGLHAAGKGINVAKVLKDLGIDVTVGGFLGKDNQDGFQQLFSELGIANRFQVVQGRTRINVKLTEKDGEVTDFNFSGFDVTPADWERFVNDSLSWLGQFDMVCVSGSLPAGVSPEAFTDWMTRLRSQCPCIIFDSSREALVAGLKAAPWLVKPNRRELEIWAGRKLPEMKDVIDAAHALREQGIAHVVISLGAEGALWVNASGEWIAKPPAVDVVSTVGAGDSMVGGLIYGLLMRESSEHTLRLATAVAALAVSQSNVGITDRPQLAAMMARVDLQPFN* +>3 +MNKNKYSTPLLMLATILAGMLSPMQSAVNGQLGHWLQDGNACAVISFASGLVVMFFIIIARKETRQQFASIPTLIKKRKIPLWNWFAGLCGAMVVFSEGASASALGVATFQTALISALLLSGLLCDRFGIGVEEKKYFTPWRITGALFAVIATIFVVSPQWHSTSFILLAILPFLAGLLAGWQPAGNAKVAEATGSMLVSITWNFIVGFCVLGAALAIRIALGHVTIQLPDTWWMYLGGPLGLLSIGLMAILVRGLGLLMLGVASTAGQLLGSVLIDELIPSLGNTVYLVTIIGTLFALVGAIVTTIPEYRASKMAQKMEVSE* +>4 +MKGRWAKYVATGVMLAMLAACSSKPTDRGQQYKDGKFTQPFSLVNQPDAVGAPINAGDFAEQVDQIRSASPRLYTNQSNVYNAVQNWLRSGGDTRTMRQFGIDAWQMEGTDNYGNVQFTGYYTPVVQARHTRQGAFQYPIYSMPPKRGRLPSRAQIYAGALSDKYILAWSNSLMDNFIMDVQGSGYIDFGDGSPLNFFSYAGKNGWPYRSIGKVLIDRGEVKKEDMSMQAIREWGEKHSEAEVRELLEQNPSFVFFKPQSFAPVKGASAVPLIGRASVASDRSIIPPGTTLLAEVPLLDNNGKFSGQYELRLMVALDVGGAIKGQHFDIYQGIGPDAGHRAGWYNHYGRVWVLKSAPGAGNVFSG* +>5 +LFDRYDAGEQAVLVHIYFSQDKDMEDLQEFESLVSSAGVEAMQVITGSRKAPHPKYFVGEGKAVEIAEAVKATGAAVVLFDHALSPAQERNLERLCECRVIDRTGLILDIFAQRARTHEGKLQVELAQLRHLATRLVRGWTHLERQKGGIGLRGPGETQLETDRRLLRNRIVQIQSRLEKVEKQREQGRQSRIKADVPTVSLVGYTNAGKSTLFNQITEARVYAADQLFATLDPTLRRIDVADVGETVLADTVGFIRHLPYDLVAAFKATLQETRQATLLLHVVDAADVRVQENIEAVNTVLEEIDAHEIPTLMVMNKIDMLDDFEPRIDRDEENKPIRVWLSAQSGVGIPQLFQALTERLSGEVAQHTLRLLPQEGRLRSRFYQLQAIEKEWMEEDGSVSLQVRMPIVDWRRLCKQEPALIEYVI* +>6 +VAQRILVLGASGYIGQHLVFALSQQGHQVRAAARRVERLEKHRLANVSCHKVDLHWPENLPALLRDIDTVYYLVHGMGEGGDFIAHERQAALNVRDALRQTPVKQLIFLSSLQAPAHEQSDHLRARQLTADTLRDAGVPVTELRAGIIVGAGSAAFEVMRDMVYNLPILTPPRWVRSRTTPIALENLLYYLVGLLEHPAHEHRILEAAGPQVLSYQQQFERFMAVSGKRRPLIPVPFPTRWISVWFLNVITSVPPTTAKALIQGLRHDLLADDAALKKLIPQTLITFDDAVRRTLKEEEKLVNSSDWGYDALAFARWRPEYGYFPKQAGFTAQTPASLSALWQVVNRLGGKEGYFFGNILWQTRAAMDRLVGHKLAKGRPSHTLLKPGDTVDSWKVIIVEPEKQLTLLFGMKAPGLGRLSFTLHDKGRYREIDVRAWWHPHGMPGLIYWLLMIPAHLFIFRGMARRIARLAEQITEK* +>7 +MNKFARHFLPLSLRVRFLLATAGVVLVLSLAYGIVALVGYSVSFDKTTFRLLRGESNLFYTLAKWENNKISVELPENLDMQSPTMTLIYDETGKLLWTQRNIPWLIKSIQPEWLKTNGFHEIETNVDATSTLLSEDHSAQEKLKEVREDDDDAEMTHSVAVNIYPATTRMPQLTIVVVDTIPIELKRSYMVWSWFVYVLAANLLLVIPLLWIAAWWSLRPIEALAREVRELEDHHREMLNPETTRELTSLVRNLNQLLKSERERYNKYRTTLTDLTHSLKTPLAVLQSTLRSLRNEKMSVSKAEPVMLEQISRISQQIGYYLHRASMRGSGVLLSRELHPVAPLLDNLISALNKVYQRKGVNISMDISPEISFVGEQNDFVEVMGNVLDNACKYCLEFVEISARQTDDHLHIFVEDDGPGIPHSKRSLVFDRGQRADTLRPGQGVGLAVAREITEQYAGQIIASDSLLGGARMEVVFGRQHPTQKEE* +>8 +MTIQKRLLEAVEQKLLRPIDAQFALTVAGNDDPAVTLAAALLSHDAGEGHVCLPLSRLTLTEEAHPLLVAWISETATPIDWKKRLLASAAVSCGDSPAPLILCGDRLYLNRMWCNERTVARFFNEVNQAIAVDEDQLSRILDALFPPTDEVNWQKVAAAVALTRRISVISGGPGTGKTTTVAKLLAALIQMADGERCRIRLAAPTGKAAARLTESLGAALRQLPLTDAQKKRIPEDASTLHRLLGAQPGSQRLRHHAGNPLHLDVLVVDEASMIDLPMMSRLIDALPPHGRVIFLGDRDQLASVEAGAVLGDICAYVNAGFTAERARQLSRLTGSAIPAGAGTQAASLRDSLCLLQKSYRFGSDSGIGKLAAAINCGDRSAIQAVFQQGFSDIEKRTLQSSDDYAGMLDEALAGYGRYLRLLHEKAAPEAILQAFNEYQLLCALREGPFGVRGLNDRIEQAMVQQRKIQRHPHSRWYEGRPVMIARNDSALGLFNGDIGIALDRGQGLRVWFVMPDGTIKSVQPSRLPEHDTTWAMTVHKSQGSEFDHAALILPSQRSPVVTRELVYTAVTRARRRLSLYADERILAGAIVTRTERRSGLATLFDEVSRIG* +>9 +MQEVAMSSQEASKMLRTYNIAWWGNNYYDVNELGHISVCPDPDVPEARVDLAKLVKAREAQGQRLPALFCFPQILQHRLRSINAAFKRARESYGYNGDYFLVYPIKVNQHRRVIESLIHSGEPLGLEAGSKAELMAVLAHAGMTRSVIVCNGYKDREYIRLALIGEKMGHKVYLVIEKMSEIAIVLEEAERLNVVPRLGVRARLASQGSGKWQSSGGEKSKFGLAATQVLQLVETLRDAGRLDSLQLLHFHLGSQMANIRDIATGVRESARFYVELHKLGVNIQCFDVGGGLGVDYEGTRSQSDCSVNYGLNEYANNIIWAIGDACEEHGLPHPTVITESGRAVTAHHTVLVSNIIGVERNEYTDPTAPAEDAPRALQNLWETWQEMHKPGTRRSLREWLHDSQMDLHDIHIGYSSGAFSLQERAWAEQLYLSMCHEVQKQLDPQNRAHRPIIDELQERMADKMYVNFSLFQSMPDAWGIDQLFPVLPLEGLDQVPERRAVLLDITCDSDGAIDHYIDGDGIATTMPMPEYDPENPPMLGFFMVGAYQEILGNMHNLFGDTEAVDVFVFPDGSVEVELSDEGDTVADMLQYVQLDPKTLLTHFRDQVKQTDLDDALQQQFLEEFEAGLYGYTYLEDE* +>10 +MNSLPQRSTDFELTTSQDGFALSWQQRLILRHSAENPCLWIGAGVADIDMFRGNFSIKDKLNEKIALTEATVSELPDGWLVQFSRGATISATLRISADEAGRLTLDLQNDDLHHNRIWLRLAANPDDHIYGCGEQFSYFDLRGKPFPLWTSEQGVGRNKTSYVTWQADCKENAGGDYYWTFFPQPTFVSTQKYYCHVDNSCYMNFDFSAPEYHELALWEDKTTLRFECADTYIALLEKLTALLGRQPELPDWVYDGVTLGIQGGTEVCQQKLDTMRNAGVKVNGIWAQDWSGIRMTSFGKRVMWNWKWNSDNYPQLDSRIKQWKEEGVQFLSYINPYVASDKDLCAEAARHGYLAKDATGGDYLVEFGEFYGGVVDLTNPEAYDWFKDVIKKNMIALGCSGWMADFGEYLPTDTYLHNGVSAEIMHNAWPALWAKCNYEALQETGKLGEILFFMRAGYTGSQKYSTMMWAGDQNVDWSLDDGLASVVPAALSLAMTGHGLHHSDIGGYTTLFDMKRSKELLLRWCDFSAFTPMMRTHEGNRPGNNWQFDGDAETIAHFARMTTVFTTLKPYLKQAVAQNAATGLPVMRPLFLHYENDAATYTLKYQYLLGQDLLVAPVHEQGRCDWTLYLPEDHWVNIWTGEAHHGGEITVDAPIGKPPVFYRAKSEWALLFASLRNI* +>11 +MKLFLTTAALTATLISGMAFASDPVIPWATNSGGTESTHIAAMGEDLNAQHQQITHTHEGVWAANSGSIQADEAALTSNKPPVQGHPELMPHQG* +>12 +MKHLRVVACMIMLALAGCDNNDKTAPTTKSEAPAVAQPSPAQDPSQLQKLAQQSQGKALTLLDASEAQLDGAATLVLTFSIPLDPEQDFSRVVHVVDKKSGSVDGAWELAPNLKELRLRHLEPERVLVVTVDPAVKALNNATFGKSYEKTITTRDVQPSVGFASRGSLLPGKIAEGLPVMALNVNHVDVNFFRVKPGSLASFVSQWEYRSSLSNWESDNLLKMADLVYTGRFDLNPARNTREKLLLPLSDIKPLQQAGVYVAVMNQAGHYNYSNAATLFTLSDIGVSAHRYHSRLDIFTQSLENGAAQSGIEIVLLNDKGQTLAQATSDAQGHVQLEADKAAALLLARKEGQTTLLDLTLPALDLSEFNVAGAPGYSKQFFMFGPRDLYRPGETVILNGLLRDSDGKTLPDQPVKLEVVKPDGQVMRTVVSQPENGLYRLNYPLDINAPTGLWHVRANTGDNLLRSWDFHVEDFMPERMALNLTAQKTPLAPADEVKFSVVGYYLYGAPANGNTLQGQLFLRPLRDAVAALPGFQFGNIAEENLSRSLDEVQLTLDKGGRGEVSAASQWQEAHSPLQVILQASLLESGGRPVTRRVEQAIWPADTLPGIRPQFAAKAVYDYRTDTTVNQPIVDEDSNAAFDIVYANAQGEKKAVSGLQVRLIRERRDYYWNWSESEGWQSQFDQKDLVEGEQTLDLNADETGKVSFPVEWGAYRLEVKAPNETVSSVRFWAGYSWQDNSDGSGAARPDRVTLKLDKANYRPGDTMKLHIAAPVAGKGYAMVESSDGPLWWQAIDVPAQGLELTIPVDKTWNRHDLYLSTLVVRPGDKSRSATPKRAVGLLHLPLGDDNRRLDLALESPAKMRPNQPLTVRVKASVKHGEMPKQINVLVSAVDSGVLNITDYATPDPWQAFFGQKRYGADIYDIYGQVIEGQGRLAALRFGGDGDDLTRGGKPPVNHANIIAQQAQPITLNEQGEGVVTLPIGDFNGELRVMAQAWTADDFGRGESKVVVAAPVIAELNMPRFLAGGDVSRLVLDVTNLTDRPQTLNIALAASGLLELLSQQPQPVNLAPGVRTTLFVPVRALEGFGEGEIQATISGLNLPGETLGAQHKQWQIGVRPAWPAQTVNSGIALAPGESWHVPEQHLANVSPATLQGQLLLSGKPPLNLARYIRELKAYPYGCLEQTTSGLFPALYTNAAQLQSLGITGDSDEKRRAAVDIGISRILQMQRDNGGFALWDENGAEEPWLTAYAMDFLIRAGEQGYSVPPEAINRGNERLLRYLQDPGTMLIRYSDNTQASTFAAQAYAALVLARQQKAPLGALREIWERRSQAASGLPLMQLGIALNTMGDARRGEEAITLALNTPRQDERQWIADYGSSLRDNALMLSLLEENNLRPDAQNALLSSLSEQAFGQRWLSTQENNALFLAAHSRQASAGAWQAQTSLEAQPLSGDKALTRNLDADQLAALEVTNTGSQPLWLRLDSSGYPSSAPEPASNVLQIERQILGTDGQRKSLSSLRSGELVLVWLTVVADRNVPDALVVDLLPAGLELENQNLADSSASLPESGSEVQNLLNQMQQADIQYMEFRDDRFVAAVVVNEGQPVTLVYLARAVTPGTYQLAQPQVESMYAPQWRATGASEGLLIVTP* +>13 +MAKNRSRRLRKKMHIDEFQELGFSVAWRFPEGTSEEQIDKTVDDFINDVIEPNKLAFDGSGYLAWEGLICMQEIGKCTEEHQAIVRKWLEARNLEEVRTSELFDVWWD* +>14 +LKLIPFYLLALFSAASGATEINACKDLIGTWKTTADNPPYTMTILPPVEGCGEKCVKLNVQYELDVTHRNALYCHEGQEGVKGQGPMVIAFEGAYGGHAIGTYNRQLQLLWAGVIPKNKKGKWITKMENYWFRQVKAH* +>15 +MKKFYSCLPVFLLIGCAQVPLPSSVSKPVQQPGAQKEQLANANSIDECQSLPYVPSDLAKNKSLSNQNADNSASKNSAISSSIFCEKYKQTKEQALTFFQEHPQYMRSKEDEEQLMTEFKKVLLEPGSKNLSIYQTLLAAHERLQAL* +>16 +MRIKPDDNWRWYYDEEHDRMMLDLANGMLFRSRFSRKMLTPDAFCPTGFCVDDAALYFSFEEKCRDFELTKEQRAELVLNALVAIRYLKPQMPKSWHFVAHGEMWTPGTGDAASVWLSDTAEQVNLLVVEPGENAALCLLAQPGVVIAGRTMQLGDAIKIMNDRLKPQVHCHSFSLEQAV* +>17 +VLAFTLRFIKNKRYFAILAGALVIIAGLASQHAWSGNGLPQINGKALAALAKQHPVVVLFRHAERCDRSDNTCLSDSTGITVNGAQDARALGKAFSADIQNYNLYSSNTVRTIQSATWFSAGRSLTADKKMMDCGSGIYASINTLLKKSQNKNIVIFTHNHCLTYIAKNKRGVKFDPDYLNALVMYAENGKLFLDGEFVPG* +>18 +MGSNYIVIEGLEGAGKTTARDVVVETLEQLGIRNMIFTREPGGTQLAEKLRSLVLDIRSVGDEVITDKAEVLMFYAARVQLVETVIKPALAQGVWVIGDRHDLSTQAYQGGGRGIDQTMLATLRDAVLGDFRPDLTLYLDVTPEVGLKRARARGDLDRIEQESFDFFNRTRARYLELAAQDSRIRTIDATQPLDAVMRDIRATVTKWVQEQAA* +>19 +MKHIKKSVLVVLLTSHVAHASIVVGGTRLVFDGNNDESSINVENKDSKANLVQSWLSVADPQVTNKQVFIITPPLFRLDAGQKNSIRVIRSGAPLPADRESMYWLNIKGIPSIDDNASANRVEISINTQIKLIYRPPALTKSTPDSQSQQLKWQTAGDVITVNNPTPYYMNFASVTLNSHEVKSATFVPPKSSASFKLSSTAAPHGTVTWRLISDYGMSLEPHSGSF* diff --git a/tests/test_data/outputs/search/G1/run.json b/tests/test_data/outputs/search/G1/run.json new file mode 100755 index 0000000..c036a0b --- /dev/null +++ b/tests/test_data/outputs/search/G1/run.json @@ -0,0 +1,27 @@ +{ + "analysis_start_time": "10/06/2024 11:11:32", + "parameters": { + "query": "locidex/extract/G1/raw.extracted.seqs.fasta", + "outdir": "locidex/search/G1", + "name": "G1", + "db": "locidex/db", + "config": null, + "min_evalue": 0.0001, + "min_dna_len": 1, + "min_aa_len": 1, + "max_dna_len": 10000000, + "max_aa_len": 10000000, + "min_dna_ident": 80.0, + "min_aa_ident": 80.0, + "min_dna_match_cov": 80.0, + "min_aa_match_cov": 80.0, + "max_target_seqs": 10, + "n_threads": 8, + "format": null, + "translation_table": 11, + "annotate": false, + "force": true + }, + "result_file": "locidex/search/G1/seq_store.json", + "analysis_end_time": "10/06/2024 11:11:34" +} \ No newline at end of file diff --git a/tests/test_data/outputs/search/G1/seq_store.json b/tests/test_data/outputs/search/G1/seq_store.json new file mode 100755 index 0000000..c96c860 --- /dev/null +++ b/tests/test_data/outputs/search/G1/seq_store.json @@ -0,0 +1,1744 @@ +{ + "db_info": { + "db_name": "Locidex Test Database", + "db_version": "1.0.0", + "db_date": "2024/06/06", + "db_author": "James Robertson", + "db_desc": "Using Salmonella GCA_000007545.2 as source data", + "db_num_seqs": 20, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" + }, + "db_seq_info": { + "0": { + "seq_id": 0, + "locus_name": "locus_1", + "locus_name_alt": "SALM_11273", + "locus_product": "!", + "locus_description": "hypothetical protein", + "locus_uid": "1", + "dna_seq_len": 102, + "dna_seq_hash": "d17b02d12afa7f832ee37df6f24a8f55", + "aa_seq_len": 34, + "aa_seq_hash": "84ac553cb45dd790c497c27152888f02", + "dna_min_len": 71.4, + "dna_max_len": 132.6, + "aa_min_len": 23.8, + "aa_max_len": 44.2, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "1": { + "seq_id": 1, + "locus_name": "locus_2", + "locus_name_alt": "SALM_120", + "locus_product": "@", + "locus_description": "outer membrane protein", + "locus_uid": "B", + "dna_seq_len": 285, + "dna_seq_hash": "e35184c8ff18e9116fc8faef20532f56", + "aa_seq_len": 95, + "aa_seq_hash": "2d9e7f7f0d11bf02db860b0799e7924d", + "dna_min_len": 199.5, + "dna_max_len": 370.5, + "aa_min_len": 66.5, + "aa_max_len": 123.5, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "2": { + "seq_id": 2, + "locus_name": "locus_3", + "locus_name_alt": "SALM_2016", + "locus_product": "#", + "locus_description": "tRNA (guanosine(46)-N7)-methyltransferase TrmB", + "locus_uid": "C", + "dna_seq_len": 327, + "dna_seq_hash": "670705cd2a59c4a23a897ac656a888fe", + "aa_seq_len": 109, + "aa_seq_hash": "4277f8bbf1fd6682001da089fea83d04", + "dna_min_len": 228.9, + "dna_max_len": 425.1, + "aa_min_len": 76.3, + "aa_max_len": 141.7, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "3": { + "seq_id": 3, + "locus_name": "locus_4", + "locus_name_alt": "SALM_8644", + "locus_product": "$", + "locus_description": "AZ624_004720", + "locus_uid": "AZ624_004720", + "dna_seq_len": 417, + "dna_seq_hash": "ac1b21798c0f672ad26f5a91ea278590", + "aa_seq_len": 139, + "aa_seq_hash": "0c25367401155278f34832f184ab44e6", + "dna_min_len": 291.9, + "dna_max_len": 542.1, + "aa_min_len": 97.3, + "aa_max_len": 180.7, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "4": { + "seq_id": 4, + "locus_name": "locus_5", + "locus_name_alt": "SALM_1876", + "locus_product": "%", + "locus_description": "SPI-1 type III secretion system invasion lipoprotein InvH", + "locus_uid": "E", + "dna_seq_len": 444, + "dna_seq_hash": "d00defcca8588f21ce16fa1d0ac13389", + "aa_seq_len": 148, + "aa_seq_hash": "c8bf12a8057fc5e541fcd4924136a40d", + "dna_min_len": 310.8, + "dna_max_len": 577.2, + "aa_min_len": 103.6, + "aa_max_len": 192.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "5": { + "seq_id": 5, + "locus_name": "locus_6", + "locus_name_alt": "SALM_640", + "locus_product": "^", + "locus_description": "MOSC domain-containing protein", + "locus_uid": "F", + "dna_seq_len": 543, + "dna_seq_hash": "a11561f2804e2c32c78049f8b9aeb517", + "aa_seq_len": 181, + "aa_seq_hash": "1f6a45ea291940ef2c17ec1cfdd5fbdd", + "dna_min_len": 380.1, + "dna_max_len": 705.9, + "aa_min_len": 126.7, + "aa_max_len": 235.3, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "6": { + "seq_id": 6, + "locus_name": "locus_7", + "locus_name_alt": "SALM_1501", + "locus_product": "&", + "locus_description": "India: Vellore", + "locus_uid": "G", + "dna_seq_len": 606, + "dna_seq_hash": "dc94bf1ec4ff9bed2a1f460cbd958656", + "aa_seq_len": 202, + "aa_seq_hash": "62252b3326997117f127efb88ff09294", + "dna_min_len": 424.2, + "dna_max_len": 787.8, + "aa_min_len": 141.4, + "aa_max_len": 262.6, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "7": { + "seq_id": 7, + "locus_name": "locus_8", + "locus_name_alt": "SALM_756", + "locus_product": "*", + "locus_description": "DNA polymerase III subunit delta'", + "locus_uid": "H", + "dna_seq_len": 642, + "dna_seq_hash": "7ebe74afecf146ec4db816c8deced64f", + "aa_seq_len": 214, + "aa_seq_hash": "2449629747a7c58f0f2cad411db87178", + "dna_min_len": 449.4, + "dna_max_len": 834.6, + "aa_min_len": 149.8, + "aa_max_len": 278.2, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "8": { + "seq_id": 8, + "locus_name": "locus_9", + "locus_name_alt": "SALM_7353", + "locus_product": "1", + "locus_description": "fimbrial assembly chaperone", + "locus_uid": "I", + "dna_seq_len": 684, + "dna_seq_hash": "41ebb36872854b2b33c8c028e23d8ad1", + "aa_seq_len": 228, + "aa_seq_hash": "a75a93991228940fb46d917f238beb5a", + "dna_min_len": 478.8, + "dna_max_len": 889.2, + "aa_min_len": 159.6, + "aa_max_len": 296.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "9": { + "seq_id": 9, + "locus_name": "locus_10", + "locus_name_alt": "SALM_1891", + "locus_product": "200.96", + "locus_description": "5'/3'-nucleotidase SurE", + "locus_uid": "J", + "dna_seq_len": 762, + "dna_seq_hash": "fe04d17ec353c08b903c85fc0ca4dc02", + "aa_seq_len": 254, + "aa_seq_hash": "bd09702e070040e0fc8d2ec3b830812c", + "dna_min_len": 533.4, + "dna_max_len": 990.6, + "aa_min_len": 177.8, + "aa_max_len": 330.2, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "10": { + "seq_id": 10, + "locus_name": "locus_11", + "locus_name_alt": "SALM_1452", + "locus_product": "|", + "locus_description": "1-phosphofructokinase", + "locus_uid": "K", + "dna_seq_len": 858, + "dna_seq_hash": "5b128d659955716833ce42f2bb060212", + "aa_seq_len": 286, + "aa_seq_hash": "4daecf1a6ccae76e1d97a4ce9ee4ff0b", + "dna_min_len": 600.6, + "dna_max_len": 1115.4, + "aa_min_len": 200.2, + "aa_max_len": 371.8, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "11": { + "seq_id": 11, + "locus_name": "locus_12", + "locus_name_alt": "SALM_11020", + "locus_product": "_", + "locus_description": "1", + "locus_uid": "L", + "dna_seq_len": 972, + "dna_seq_hash": "eb72da68c159497d5f0c8eeddc51b5ae", + "aa_seq_len": 324, + "aa_seq_hash": "a1f16dd269dc295e715f2d00f9c26b43", + "dna_min_len": 680.4, + "dna_max_len": 1263.6, + "aa_min_len": 226.8, + "aa_max_len": 421.2, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "12": { + "seq_id": 12, + "locus_name": "locus_13", + "locus_name_alt": "SALM_1934", + "locus_product": "-", + "locus_description": "3.9", + "locus_uid": "M", + "dna_seq_len": 1098, + "dna_seq_hash": "8f300259dcb46224bdc1fe5273107324", + "aa_seq_len": 366, + "aa_seq_hash": "b16bc26e42f5bc4327504b1ec8b2d53d", + "dna_min_len": 768.6, + "dna_max_len": 1427.4, + "aa_min_len": 256.2, + "aa_max_len": 475.8, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "13": { + "seq_id": 13, + "locus_name": "locus_14", + "locus_name_alt": "SALM_2871", + "locus_product": "+", + "locus_description": "@", + "locus_uid": "N", + "dna_seq_len": 1281, + "dna_seq_hash": "b9060019038526aa6fc38d2f7510edc6", + "aa_seq_len": 427, + "aa_seq_hash": "ed11561b2bedaa12c7a28eb0e9346101", + "dna_min_len": 896.7, + "dna_max_len": 1665.3, + "aa_min_len": 298.9, + "aa_max_len": 555.1, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "14": { + "seq_id": 14, + "locus_name": "locus_15", + "locus_name_alt": "SALM_583", + "locus_product": "=", + "locus_description": "DMT family transporter", + "locus_uid": "O", + "dna_seq_len": 1434, + "dna_seq_hash": "bc98c2fe196a68a79036814396513a8d", + "aa_seq_len": 478, + "aa_seq_hash": "16cb2acec887d5861327e04f2705b8ce", + "dna_min_len": 1003.8, + "dna_max_len": 1864.2, + "aa_min_len": 334.6, + "aa_max_len": 621.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "15": { + "seq_id": 15, + "locus_name": "locus_16", + "locus_name_alt": "SALM_780", + "locus_product": "<", + "locus_description": "murein transglycosylase A", + "locus_uid": "P", + "dna_seq_len": 1464, + "dna_seq_hash": "16e55766c603fe33c9e75d8e81743ae2", + "aa_seq_len": 488, + "aa_seq_hash": "b20314e55f9713235e9c4ea5817b56df", + "dna_min_len": 1024.8, + "dna_max_len": 1903.2, + "aa_min_len": 341.6, + "aa_max_len": 634.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "16": { + "seq_id": 16, + "locus_name": "locus_17", + "locus_name_alt": "SALM_1937", + "locus_product": ">", + "locus_description": "GTPase HflX", + "locus_uid": "Q", + "dna_seq_len": 1836, + "dna_seq_hash": "a0d97d985483413f3c18bfe5833ae9ce", + "aa_seq_len": 612, + "aa_seq_hash": "d6012eddc7a6e8d7d40761d00ed71a5a", + "dna_min_len": 1285.2, + "dna_max_len": 2386.8, + "aa_min_len": 428.4, + "aa_max_len": 795.6, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "17": { + "seq_id": 17, + "locus_name": "locus_18", + "locus_name_alt": "SALM_1997", + "locus_product": "?", + "locus_description": "biosynthetic arginine decarboxylase", + "locus_uid": "R", + "dna_seq_len": 1914, + "dna_seq_hash": "b3021e979faa7600756c06dfadfcf14c", + "aa_seq_len": 638, + "aa_seq_hash": "e68e83956ee1d4c685571e5348c8def1", + "dna_min_len": 1339.8, + "dna_max_len": 2488.2, + "aa_min_len": 446.6, + "aa_max_len": 829.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "18": { + "seq_id": 18, + "locus_name": "locus_19", + "locus_name_alt": "SALM_9926", + "locus_product": ",", + "locus_description": "https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662", + "locus_uid": "S", + "dna_seq_len": 2037, + "dna_seq_hash": "a012eee23637b48e39b00808a057e35d", + "aa_seq_len": 679, + "aa_seq_hash": "1302fea1dbd5bb756db34e09b863ad44", + "dna_min_len": 1425.9, + "dna_max_len": 2648.1, + "aa_min_len": 475.3, + "aa_max_len": 882.7, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "19": { + "seq_id": 19, + "locus_name": "locus_20", + "locus_name_alt": "SALM_6064", + "locus_product": ".", + "locus_description": "alpha-2-macroglobulin family protein", + "locus_uid": "T", + "dna_seq_len": 4935, + "dna_seq_hash": "4461918e985715e4a2b07494e1f91326", + "aa_seq_len": 1645, + "aa_seq_hash": "12ff39a1933fa478729b142976b0a659", + "dna_min_len": 3454.5, + "dna_max_len": 6415.5, + "aa_min_len": 1151.5, + "aa_max_len": 2138.5, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + } + }, + "query_data": { + "sample_name": "G1", + "query_seq_data": { + "0": { + "parent_id": "locus_1:0:0:0", + "locus_name": "locus_1:0:0:0", + "seq_id": "locus_1:0:0:0", + "dna_hash": "d17b02d12afa7f832ee37df6f24a8f55", + "dna_len": 102, + "aa_hash": "a931d1f75114576e60538364eb01a05f", + "aa_len": 34, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "1": { + "parent_id": "locus_10:9:0:1", + "locus_name": "locus_10:9:0:1", + "seq_id": "locus_10:9:0:1", + "dna_hash": "fe04d17ec353c08b903c85fc0ca4dc02", + "dna_len": 762, + "aa_hash": "988bf512f0362e276b0e5622fbaa7079", + "aa_len": 254, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "2": { + "parent_id": "locus_11:10:0:2", + "locus_name": "locus_11:10:0:2", + "seq_id": "locus_11:10:0:2", + "dna_hash": "c4266f2f24fdd8e039113c6b0955af9f", + "dna_len": 858, + "aa_hash": "9b9be0e0a2b6f84053716d6c14a0fb9a", + "aa_len": 286, + "start_codon": "gtg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "3": { + "parent_id": "locus_12:11:0:3", + "locus_name": "locus_12:11:0:3", + "seq_id": "locus_12:11:0:3", + "dna_hash": "eb72da68c159497d5f0c8eeddc51b5ae", + "dna_len": 972, + "aa_hash": "2fb77f6d0f615a030a08af74f3eda277", + "aa_len": 324, + "start_codon": "atg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "4": { + "parent_id": "locus_13:12:0:4", + "locus_name": "locus_13:12:0:4", + "seq_id": "locus_13:12:0:4", + "dna_hash": "8f300259dcb46224bdc1fe5273107324", + "dna_len": 1098, + "aa_hash": "fd5140611af6e0d9f95426bf9beaa8b1", + "aa_len": 366, + "start_codon": "atg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "5": { + "parent_id": "locus_14:13:0:5", + "locus_name": "locus_14:13:0:5", + "seq_id": "locus_14:13:0:5", + "dna_hash": "2fa0b06ed72e36b4071cab9d0b4f87d0", + "dna_len": 1281, + "aa_hash": "bf5190f310477277da454725d434a8ee", + "aa_len": 427, + "start_codon": "ttg", + "stop_codon": "tag", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "6": { + "parent_id": "locus_15:14:0:6", + "locus_name": "locus_15:14:0:6", + "seq_id": "locus_15:14:0:6", + "dna_hash": "bc98c2fe196a68a79036814396513a8d", + "dna_len": 1434, + "aa_hash": "5d8c3cf06444c3741ebfc57cc92db0d3", + "aa_len": 478, + "start_codon": "gtg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "7": { + "parent_id": "locus_16:15:0:7", + "locus_name": "locus_16:15:0:7", + "seq_id": "locus_16:15:0:7", + "dna_hash": "a9b3cb97dac3cda6e932a49bf9a507bd", + "dna_len": 1464, + "aa_hash": "3ca5f1d7b46eda9460608ef61603c12f", + "aa_len": 488, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "8": { + "parent_id": "locus_17:16:0:8", + "locus_name": "locus_17:16:0:8", + "seq_id": "locus_17:16:0:8", + "dna_hash": "a0d97d985483413f3c18bfe5833ae9ce", + "dna_len": 1836, + "aa_hash": "bdd2b967a7a6337b5973dd723a62fa43", + "aa_len": 612, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "9": { + "parent_id": "locus_18:17:0:9", + "locus_name": "locus_18:17:0:9", + "seq_id": "locus_18:17:0:9", + "dna_hash": "b3021e979faa7600756c06dfadfcf14c", + "dna_len": 1914, + "aa_hash": "42c4a831ee79a27c47138fe96829814b", + "aa_len": 638, + "start_codon": "atg", + "stop_codon": "tag", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "10": { + "parent_id": "locus_19:18:0:10", + "locus_name": "locus_19:18:0:10", + "seq_id": "locus_19:18:0:10", + "dna_hash": "de32372598811d63bcc1a0eaf6872644", + "dna_len": 2037, + "aa_hash": "a48a4e4dc8c7f61a7be06a7f72142198", + "aa_len": 679, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "11": { + "parent_id": "locus_2:1:0:11", + "locus_name": "locus_2:1:0:11", + "seq_id": "locus_2:1:0:11", + "dna_hash": "8b70e777f6bbf2c91ff75947824b5976", + "dna_len": 285, + "aa_hash": "6e403f4ed2da629ea2ebfe18278ed120", + "aa_len": 95, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "12": { + "parent_id": "locus_20:19:0:12", + "locus_name": "locus_20:19:0:12", + "seq_id": "locus_20:19:0:12", + "dna_hash": "4461918e985715e4a2b07494e1f91326", + "dna_len": 4935, + "aa_hash": "ffe361d9273afbab0ed9009dd5aa4041", + "aa_len": 1645, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "13": { + "parent_id": "locus_3:2:0:13", + "locus_name": "locus_3:2:0:13", + "seq_id": "locus_3:2:0:13", + "dna_hash": "670705cd2a59c4a23a897ac656a888fe", + "dna_len": 327, + "aa_hash": "f46f4f7b2362abaf1440c9752e10670e", + "aa_len": 109, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "14": { + "parent_id": "locus_4:3:0:14", + "locus_name": "locus_4:3:0:14", + "seq_id": "locus_4:3:0:14", + "dna_hash": "73790840c76943caac0ebb3b2b3f0b98", + "dna_len": 417, + "aa_hash": "77784601d754a5f36152853592023b08", + "aa_len": 139, + "start_codon": "ctg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "15": { + "parent_id": "locus_5:4:0:15", + "locus_name": "locus_5:4:0:15", + "seq_id": "locus_5:4:0:15", + "dna_hash": "8cf4341689dd00f74adfcc43d1f4a35e", + "dna_len": 444, + "aa_hash": "736cc3184dda2c5ac596f76753272622", + "aa_len": 148, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "16": { + "parent_id": "locus_6:5:0:16", + "locus_name": "locus_6:5:0:16", + "seq_id": "locus_6:5:0:16", + "dna_hash": "a11561f2804e2c32c78049f8b9aeb517", + "dna_len": 543, + "aa_hash": "f7a904be6607ca5a6d9388ad9ca3693a", + "aa_len": 181, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "17": { + "parent_id": "locus_7:6:0:17", + "locus_name": "locus_7:6:0:17", + "seq_id": "locus_7:6:0:17", + "dna_hash": "49d9878c9d3071aa1d2f26cb947b784c", + "dna_len": 606, + "aa_hash": "a1169e1ef4c2882247a9349da07cb6bd", + "aa_len": 202, + "start_codon": "gtg", + "stop_codon": "tag", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "18": { + "parent_id": "locus_8:7:0:18", + "locus_name": "locus_8:7:0:18", + "seq_id": "locus_8:7:0:18", + "dna_hash": "7ebe74afecf146ec4db816c8deced64f", + "dna_len": 642, + "aa_hash": "ad5a47cb3a55985083b963b0cf67f382", + "aa_len": 214, + "start_codon": "atg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "19": { + "parent_id": "locus_9:8:0:19", + "locus_name": "locus_9:8:0:19", + "seq_id": "locus_9:8:0:19", + "dna_hash": "41ebb36872854b2b33c8c028e23d8ad1", + "dna_len": 684, + "aa_hash": "02989d1dbb505d15f06e76ddc9174b7e", + "aa_len": 228, + "start_codon": "atg", + "stop_codon": "tag", + "count_internal_stop": 0, + "dna_ambig_count": 0 + } + }, + "query_hit_columns": [], + "query_hits": { + "0": { + "nucleotide": [ + { + "qseqid": 0, + "sseqid": 0, + "qlen": 102, + "slen": 102, + "qstart": 1, + "qend": 102, + "sstart": 1, + "send": 102, + "length": 102, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 1.8e-51, + "bitscore": 189 + } + ], + "protein": [ + { + "qseqid": 0, + "sseqid": 0, + "qlen": 34, + "slen": 34, + "qstart": 1, + "qend": 34, + "sstart": 1, + "send": 34, + "length": 34, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 4.94e-20, + "bitscore": 64.3 + } + ] + }, + "1": { + "nucleotide": [ + { + "qseqid": 1, + "sseqid": 9, + "qlen": 762, + "slen": 762, + "qstart": 1, + "qend": 762, + "sstart": 1, + "send": 762, + "length": 762, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1408 + } + ], + "protein": [ + { + "qseqid": 1, + "sseqid": 9, + "qlen": 254, + "slen": 254, + "qstart": 1, + "qend": 254, + "sstart": 1, + "send": 254, + "length": 254, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 514.0 + } + ] + }, + "2": { + "nucleotide": [ + { + "qseqid": 2, + "sseqid": 10, + "qlen": 858, + "slen": 858, + "qstart": 1, + "qend": 858, + "sstart": 1, + "send": 858, + "length": 858, + "mismatch": 19, + "pident": 97.786, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1480 + } + ], + "protein": [ + { + "qseqid": 2, + "sseqid": 10, + "qlen": 286, + "slen": 286, + "qstart": 1, + "qend": 286, + "sstart": 1, + "send": 286, + "length": 286, + "mismatch": 17, + "pident": 94.056, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 518.0 + } + ] + }, + "3": { + "nucleotide": [ + { + "qseqid": 3, + "sseqid": 11, + "qlen": 972, + "slen": 972, + "qstart": 1, + "qend": 972, + "sstart": 1, + "send": 972, + "length": 972, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1796 + } + ], + "protein": [ + { + "qseqid": 3, + "sseqid": 11, + "qlen": 324, + "slen": 324, + "qstart": 1, + "qend": 324, + "sstart": 1, + "send": 324, + "length": 324, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 640.0 + } + ] + }, + "4": { + "nucleotide": [ + { + "qseqid": 4, + "sseqid": 12, + "qlen": 1098, + "slen": 1098, + "qstart": 1, + "qend": 1098, + "sstart": 1, + "send": 1098, + "length": 1098, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 2028 + } + ], + "protein": [ + { + "qseqid": 4, + "sseqid": 12, + "qlen": 366, + "slen": 366, + "qstart": 1, + "qend": 366, + "sstart": 1, + "send": 366, + "length": 366, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 754.0 + } + ] + }, + "5": { + "nucleotide": [ + { + "qseqid": 5, + "sseqid": 13, + "qlen": 1281, + "slen": 1281, + "qstart": 1, + "qend": 1281, + "sstart": 1, + "send": 1281, + "length": 1281, + "mismatch": 11, + "pident": 99.141, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 2305 + } + ], + "protein": [ + { + "qseqid": 5, + "sseqid": 13, + "qlen": 427, + "slen": 427, + "qstart": 1, + "qend": 427, + "sstart": 1, + "send": 427, + "length": 427, + "mismatch": 9, + "pident": 97.892, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 846.0 + } + ] + }, + "6": { + "nucleotide": [ + { + "qseqid": 6, + "sseqid": 14, + "qlen": 1434, + "slen": 1434, + "qstart": 1, + "qend": 1434, + "sstart": 1, + "send": 1434, + "length": 1434, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 2649 + } + ], + "protein": [ + { + "qseqid": 6, + "sseqid": 14, + "qlen": 478, + "slen": 478, + "qstart": 1, + "qend": 478, + "sstart": 1, + "send": 478, + "length": 478, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 972.0 + } + ] + }, + "7": { + "nucleotide": [ + { + "qseqid": 7, + "sseqid": 15, + "qlen": 1464, + "slen": 1464, + "qstart": 1, + "qend": 1464, + "sstart": 1, + "send": 1464, + "length": 1464, + "mismatch": 15, + "pident": 98.975, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 2621 + } + ], + "protein": [ + { + "qseqid": 7, + "sseqid": 15, + "qlen": 488, + "slen": 488, + "qstart": 1, + "qend": 488, + "sstart": 1, + "send": 488, + "length": 488, + "mismatch": 14, + "pident": 97.131, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 972.0 + } + ] + }, + "8": { + "nucleotide": [ + { + "qseqid": 8, + "sseqid": 16, + "qlen": 1836, + "slen": 1836, + "qstart": 1, + "qend": 1836, + "sstart": 1, + "send": 1836, + "length": 1836, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 3391 + } + ], + "protein": [ + { + "qseqid": 8, + "sseqid": 16, + "qlen": 612, + "slen": 612, + "qstart": 1, + "qend": 612, + "sstart": 1, + "send": 612, + "length": 612, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 1241.0 + } + ] + }, + "9": { + "nucleotide": [ + { + "qseqid": 9, + "sseqid": 17, + "qlen": 1914, + "slen": 1914, + "qstart": 1, + "qend": 1914, + "sstart": 1, + "send": 1914, + "length": 1914, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 3535 + } + ], + "protein": [ + { + "qseqid": 9, + "sseqid": 17, + "qlen": 638, + "slen": 638, + "qstart": 1, + "qend": 638, + "sstart": 1, + "send": 638, + "length": 638, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 1326.0 + } + ] + }, + "10": { + "nucleotide": [ + { + "qseqid": 10, + "sseqid": 18, + "qlen": 2037, + "slen": 2037, + "qstart": 1, + "qend": 2037, + "sstart": 1, + "send": 2037, + "length": 2037, + "mismatch": 16, + "pident": 99.215, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 3674 + } + ], + "protein": [ + { + "qseqid": 10, + "sseqid": 18, + "qlen": 679, + "slen": 679, + "qstart": 1, + "qend": 679, + "sstart": 1, + "send": 679, + "length": 679, + "mismatch": 8, + "pident": 98.822, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 1403.0 + } + ] + }, + "11": { + "nucleotide": [ + { + "qseqid": 11, + "sseqid": 1, + "qlen": 285, + "slen": 285, + "qstart": 1, + "qend": 285, + "sstart": 1, + "send": 285, + "length": 285, + "mismatch": 17, + "pident": 94.035, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 2.3100000000000004e-124, + "bitscore": 433 + } + ], + "protein": [ + { + "qseqid": 11, + "sseqid": 1, + "qlen": 95, + "slen": 95, + "qstart": 1, + "qend": 95, + "sstart": 1, + "send": 95, + "length": 95, + "mismatch": 11, + "pident": 88.421, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 2.89e-51, + "bitscore": 147.0 + } + ] + }, + "12": { + "nucleotide": [ + { + "qseqid": 12, + "sseqid": 19, + "qlen": 4935, + "slen": 4935, + "qstart": 1, + "qend": 4935, + "sstart": 1, + "send": 4935, + "length": 4935, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 9114 + } + ], + "protein": [ + { + "qseqid": 12, + "sseqid": 19, + "qlen": 1645, + "slen": 1645, + "qstart": 1, + "qend": 1645, + "sstart": 1, + "send": 1645, + "length": 1645, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 3332.0 + } + ] + }, + "13": { + "nucleotide": [ + { + "qseqid": 13, + "sseqid": 2, + "qlen": 327, + "slen": 327, + "qstart": 1, + "qend": 327, + "sstart": 1, + "send": 327, + "length": 327, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 5.34e-176, + "bitscore": 604 + } + ], + "protein": [ + { + "qseqid": 13, + "sseqid": 2, + "qlen": 109, + "slen": 109, + "qstart": 1, + "qend": 109, + "sstart": 1, + "send": 109, + "length": 109, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 3.09e-81, + "bitscore": 224.0 + } + ] + }, + "14": { + "nucleotide": [ + { + "qseqid": 14, + "sseqid": 3, + "qlen": 417, + "slen": 417, + "qstart": 1, + "qend": 417, + "sstart": 1, + "send": 417, + "length": 417, + "mismatch": 11, + "pident": 97.362, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 710 + } + ], + "protein": [ + { + "qseqid": 14, + "sseqid": 3, + "qlen": 139, + "slen": 139, + "qstart": 1, + "qend": 139, + "sstart": 1, + "send": 139, + "length": 139, + "mismatch": 9, + "pident": 93.525, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 3.6999999999999996e-98, + "bitscore": 269.0 + } + ] + }, + "15": { + "nucleotide": [ + { + "qseqid": 15, + "sseqid": 4, + "qlen": 444, + "slen": 444, + "qstart": 1, + "qend": 444, + "sstart": 1, + "send": 444, + "length": 444, + "mismatch": 15, + "pident": 96.622, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 737 + } + ], + "protein": [ + { + "qseqid": 15, + "sseqid": 4, + "qlen": 148, + "slen": 148, + "qstart": 1, + "qend": 148, + "sstart": 1, + "send": 148, + "length": 148, + "mismatch": 11, + "pident": 92.568, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 1.5600000000000001e-97, + "bitscore": 268.0 + } + ] + }, + "16": { + "nucleotide": [ + { + "qseqid": 16, + "sseqid": 5, + "qlen": 543, + "slen": 543, + "qstart": 1, + "qend": 543, + "sstart": 1, + "send": 543, + "length": 543, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1003 + } + ], + "protein": [ + { + "qseqid": 16, + "sseqid": 5, + "qlen": 181, + "slen": 181, + "qstart": 1, + "qend": 181, + "sstart": 1, + "send": 181, + "length": 181, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 1.7899999999999999e-140, + "bitscore": 380.0 + } + ] + }, + "17": { + "nucleotide": [ + { + "qseqid": 17, + "sseqid": 6, + "qlen": 606, + "slen": 606, + "qstart": 1, + "qend": 606, + "sstart": 1, + "send": 606, + "length": 606, + "mismatch": 15, + "pident": 97.525, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1037 + } + ], + "protein": [ + { + "qseqid": 17, + "sseqid": 6, + "qlen": 202, + "slen": 202, + "qstart": 1, + "qend": 202, + "sstart": 1, + "send": 202, + "length": 202, + "mismatch": 14, + "pident": 93.069, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 2.36e-141, + "bitscore": 384.0 + } + ] + }, + "18": { + "nucleotide": [ + { + "qseqid": 18, + "sseqid": 7, + "qlen": 642, + "slen": 642, + "qstart": 1, + "qend": 642, + "sstart": 1, + "send": 642, + "length": 642, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1186 + } + ], + "protein": [ + { + "qseqid": 18, + "sseqid": 7, + "qlen": 214, + "slen": 214, + "qstart": 1, + "qend": 214, + "sstart": 1, + "send": 214, + "length": 214, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 1.75e-157, + "bitscore": 426.0 + } + ] + }, + "19": { + "nucleotide": [ + { + "qseqid": 19, + "sseqid": 8, + "qlen": 684, + "slen": 684, + "qstart": 1, + "qend": 684, + "sstart": 1, + "send": 684, + "length": 684, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1264 + } + ], + "protein": [ + { + "qseqid": 19, + "sseqid": 8, + "qlen": 228, + "slen": 228, + "qstart": 1, + "qend": 228, + "sstart": 1, + "send": 228, + "length": 228, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 6.32e-172, + "bitscore": 463.0 + } + ] + } + }, + "locus_profile": { + "locus_1": { + "nucleotide": [ + "0" + ], + "protein": [ + "0" + ] + }, + "locus_2": { + "nucleotide": [ + "11" + ], + "protein": [ + "11" + ] + }, + "locus_3": { + "nucleotide": [ + "13" + ], + "protein": [ + "13" + ] + }, + "locus_4": { + "nucleotide": [ + "14" + ], + "protein": [ + "14" + ] + }, + "locus_5": { + "nucleotide": [ + "15" + ], + "protein": [ + "15" + ] + }, + "locus_6": { + "nucleotide": [ + "16" + ], + "protein": [ + "16" + ] + }, + "locus_7": { + "nucleotide": [ + "17" + ], + "protein": [ + "17" + ] + }, + "locus_8": { + "nucleotide": [ + "18" + ], + "protein": [ + "18" + ] + }, + "locus_9": { + "nucleotide": [ + "19" + ], + "protein": [ + "19" + ] + }, + "locus_10": { + "nucleotide": [ + "1" + ], + "protein": [ + "1" + ] + }, + "locus_11": { + "nucleotide": [ + "2" + ], + "protein": [ + "2" + ] + }, + "locus_12": { + "nucleotide": [ + "3" + ], + "protein": [ + "3" + ] + }, + "locus_13": { + "nucleotide": [ + "4" + ], + "protein": [ + "4" + ] + }, + "locus_14": { + "nucleotide": [ + "5" + ], + "protein": [ + "5" + ] + }, + "locus_15": { + "nucleotide": [ + "6" + ], + "protein": [ + "6" + ] + }, + "locus_16": { + "nucleotide": [ + "7" + ], + "protein": [ + "7" + ] + }, + "locus_17": { + "nucleotide": [ + "8" + ], + "protein": [ + "8" + ] + }, + "locus_18": { + "nucleotide": [ + "9" + ], + "protein": [ + "9" + ] + }, + "locus_19": { + "nucleotide": [ + "10" + ], + "protein": [ + "10" + ] + }, + "locus_20": { + "nucleotide": [ + "12" + ], + "protein": [ + "12" + ] + } + } + }, + "query_hit_columns": [ + "qseqid", + "sseqid", + "qlen", + "slen", + "qstart", + "qend", + "sstart", + "send", + "length", + "mismatch", + "pident", + "qcovhsp", + "qcovs", + "sstrand", + "evalue", + "bitscore" + ] +} \ No newline at end of file diff --git a/tests/test_data/outputs/search/G10/blast/nucleotide/hsps.txt b/tests/test_data/outputs/search/G10/blast/nucleotide/hsps.txt new file mode 100755 index 0000000..d91fbd6 --- /dev/null +++ b/tests/test_data/outputs/search/G10/blast/nucleotide/hsps.txt @@ -0,0 +1,19 @@ +0 0 102 102 1 102 1 102 102 0 100.000 100 100 plus 1.80e-51 189 +1 9 762 762 1 762 1 762 762 0 100.000 100 100 plus 0.0 1408 +2 11 972 972 1 972 1 972 972 0 100.000 100 100 plus 0.0 1796 +3 12 1098 1098 1 1098 1 1098 1098 0 100.000 100 100 plus 0.0 2028 +4 13 1281 1281 1 1281 1 1281 1281 0 100.000 100 100 plus 0.0 2366 +5 14 1434 1434 1 1434 1 1434 1434 0 100.000 100 100 plus 0.0 2649 +6 15 1464 1464 1 1464 1 1464 1464 0 100.000 100 100 plus 0.0 2704 +7 16 1836 1836 1 1836 1 1836 1836 0 100.000 100 100 plus 0.0 3391 +8 17 1914 1914 1 1914 1 1914 1914 0 100.000 100 100 plus 0.0 3535 +9 18 2037 2037 1 2037 1 2037 2037 0 100.000 100 100 plus 0.0 3762 +10 1 285 285 1 285 1 285 285 0 100.000 100 100 plus 1.03e-152 527 +11 19 4935 4935 1 4935 1 4935 4935 0 100.000 100 100 plus 0.0 9114 +12 2 327 327 1 327 1 327 327 0 100.000 100 100 plus 5.34e-176 604 +13 3 417 417 1 417 1 417 417 0 100.000 100 100 plus 0.0 771 +14 4 444 444 1 444 1 444 444 0 100.000 100 100 plus 0.0 821 +15 5 543 543 1 543 1 543 543 0 100.000 100 100 plus 0.0 1003 +16 6 606 606 1 606 1 606 606 0 100.000 100 100 plus 0.0 1120 +17 7 642 642 1 642 1 642 642 0 100.000 100 100 plus 0.0 1186 +18 8 684 684 1 684 1 684 684 0 100.000 100 100 plus 0.0 1264 diff --git a/tests/test_data/outputs/search/G10/blast/nucleotide/queries.fasta b/tests/test_data/outputs/search/G10/blast/nucleotide/queries.fasta new file mode 100755 index 0000000..2bab250 --- /dev/null +++ b/tests/test_data/outputs/search/G10/blast/nucleotide/queries.fasta @@ -0,0 +1,38 @@ +>0 +atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa +>1 +atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa +>2 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>3 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>4 +ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctattacgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccagcgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcggaaaggcgggattggttcgcgcggtccgggtgaaacccagctcgaagccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgagaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgcagctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaattccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtccgcaggtacgaatgccgattgtcgactggcgtcgcttctgtaaacaagaaccagcgttgatcgaatacgtgatctag +>5 +gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga +>6 +atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctcgtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatctgcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacggtgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcactcaaccggaagggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaaggctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgatgcgctgtcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtaagctgatcagccttgtgcgcaaccttaatcaactgctcaaaagcgagcacgaacgttataacaaataccgcacgagcctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatctgcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcatagcccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttctggccgacagcatcccacacagaaagaggaataa +>7 +atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa +>8 +atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag +>9 +atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacatcgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccacagtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggtcgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattacttgaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcatacagggcggtacggaagtttgtcagcaaaaactggataacatgcgcaacgcaggcgtaaaagtgtacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggggtcagcgcagagctcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacagaagaccggcaagctcggcgagatcctgttctttatgcgggcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccagcaatgactggcagttcgacggcgacgcggaaactattgcacactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgagatttccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa +>10 +atgaaactttttctgacaacggcagcgctaactgcgaccttgacttccggcatgggatttgccagcgataccgtgatcccctgggcgactaacagtggcggtacggaaagcacgcatatcgcggcaatgggcgaggatgtgaatgcgcagcatcagcacatcatccacacgcatgaaggcgtatgtgccgcaaattccggcaccattcaggctgatgaagccgcgctgaccagcaacaagccaccggtacaagtccaaccggaattgttgccgcatcagggctaa +>11 +atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa +>12 +atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa +>13 +ctgaaactaatacctttttatttgctcgcgctattttctgcctcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaacccctgcagataacccgccttataccgtgacaatattgccgccagtagaagcctgcggggaaaaatgtgtgaaactgaacgtagagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaacgacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctacaaccgacagctgcaattacttagggctggcgtaataccgaaaaataaaaagtggaaatggataacgaagatggaaaattactggttcaggcgggtaaaagcgcactaa +>14 +atgaaaaaatttgatagctgtcatcctgtctttttactgatcggctgtgcccaggtgcctctccgttcctccgtgagcaaaccggtacagcaacctagcgctcagaaagagcaactggccaacgcaaatggtattgatgagtgtcagtcacttccgtatgtgccttcagaccttgcgaagaataaatcattatcaaaccagatcgctgataataccgcatcaaaaaatagcgcaatcagctcacgcattttttgcgaaaaatataagcaaaccaaagaacaggcgcttaccttcttccaggaacttccacaatacatgcgttcgaaagaagttgaagagcaacacatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa +>15 +atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa +>16 +gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggactcacaacatgcctgtagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgccagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcgccatccagtcggcaacctggttttcggcaggcaggtcgcgtagggcggttaaaaaaatgatggattgcggcagcggtatttattcttcaatcaatacgttactgaaaaattcgcagattaaaaatatagtcattttaacccataatcattgtttgacgtatattgtaaaaaataagcgcggcgtgaagtttgatccggagtatcttaatgcgctcgttatgtatgcggaaaacggaaaactccttttagacggtgaatttgttcccggctag +>17 +atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga +>18 +atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/search/G10/blast/protein/hsps.txt b/tests/test_data/outputs/search/G10/blast/protein/hsps.txt new file mode 100755 index 0000000..e6894a0 --- /dev/null +++ b/tests/test_data/outputs/search/G10/blast/protein/hsps.txt @@ -0,0 +1,19 @@ +0 0 34 34 1 34 1 34 34 0 100.000 100 100 N/A 4.94e-20 64.3 +1 9 254 254 1 254 1 254 254 0 100.000 100 100 N/A 0.0 514 +2 11 324 324 1 324 1 324 324 0 100.000 100 100 N/A 0.0 640 +3 12 366 366 1 366 1 366 366 0 100.000 100 100 N/A 0.0 754 +4 13 427 427 1 427 1 427 427 0 100.000 100 100 N/A 0.0 869 +5 14 478 478 1 478 1 478 478 0 100.000 100 100 N/A 0.0 972 +6 15 488 488 1 488 1 488 488 0 100.000 100 100 N/A 0.0 1004 +7 16 612 612 1 612 1 612 612 0 100.000 100 100 N/A 0.0 1241 +8 17 638 638 1 638 1 638 638 0 100.000 100 100 N/A 0.0 1326 +9 18 679 679 1 679 1 679 679 0 100.000 100 100 N/A 0.0 1419 +10 1 95 95 1 95 1 95 95 0 100.000 100 100 N/A 1.23e-68 191 +11 19 1645 1645 1 1645 1 1645 1645 0 100.000 100 100 N/A 0.0 3332 +12 2 109 109 1 109 1 109 109 0 100.000 100 100 N/A 3.09e-81 224 +13 3 139 139 1 139 1 139 139 0 100.000 100 100 N/A 2.34e-106 290 +14 4 148 148 1 148 1 148 148 0 100.000 100 100 N/A 1.04e-110 302 +15 5 181 181 1 181 1 181 181 0 100.000 100 100 N/A 1.79e-140 380 +16 6 202 202 1 202 1 202 202 0 100.000 100 100 N/A 3.71e-154 416 +17 7 214 214 1 214 1 214 214 0 100.000 100 100 N/A 1.75e-157 426 +18 8 228 228 1 228 1 228 228 0 100.000 100 100 N/A 6.32e-172 463 diff --git a/tests/test_data/outputs/search/G10/blast/protein/queries.fasta b/tests/test_data/outputs/search/G10/blast/protein/queries.fasta new file mode 100755 index 0000000..4396cd1 --- /dev/null +++ b/tests/test_data/outputs/search/G10/blast/protein/queries.fasta @@ -0,0 +1,38 @@ +>0 +MYDPPFLEALMITASFFAIFIIIVVSVLLLEGD* +>1 +MRILLSNDDGVHAPGIQTLAKALREFADVQVVAPDRNRSGASNSLTLESSLRTFTFDNGDIAVQMGTPTDCVYLGVNALMRPRPDIVVSGINAGPNLGDDVIYSGTVAAAMEGRHLGFPALAVSLNGYQHYDTAAAVTCALLRGLSREPLRTGRILNVNVPDLPLAQVKGIRVTRCGSRHPADKVIPQEDPRGNTLYWIGPPGDKYDAGPDTDFAAVDEGYVSVTPLHVDLTAHSAHDVVSDWLDSVGVGTQW* +>2 +MNKNKYSTPLLMLATILAGMLSPMQSAVNGQLGHWLQDGNACAVISFASGLVVMFFIIIARKETRQQFASIPTLIKKRKIPLWNWFAGLCGAMVVFSEGASASALGVATFQTALISALLLSGLLCDRFGIGVEEKKYFTPWRITGALFAVIATIFVVSPQWHSTSFILLAILPFLAGLLAGWQPAGNAKVAEATGSMLVSITWNFIVGFCVLGAALAIRIALGHVTIQLPDTWWMYLGGPLGLLSIGLMAILVRGLGLLMLGVASTAGQLLGSVLIDELIPSLGNTVYLVTIIGTLFALVGAIVTTIPEYRASKMAQKMEVSE* +>3 +MKGRWAKYVATGVMLAMLAACSSKPTDRGQQYKDGKFTQPFSLVNQPDAVGAPINAGDFAEQVDQIRSASPRLYTNQSNVYNAVQNWLRSGGDTRTMRQFGIDAWQMEGTDNYGNVQFTGYYTPVVQARHTRQGAFQYPIYSMPPKRGRLPSRAQIYAGALSDKYILAWSNSLMDNFIMDVQGSGYIDFGDGSPLNFFSYAGKNGWPYRSIGKVLIDRGEVKKEDMSMQAIREWGEKHSEAEVRELLEQNPSFVFFKPQSFAPVKGASAVPLIGRASVASDRSIIPPGTTLLAEVPLLDNNGKFSGQYELRLMVALDVGGAIKGQHFDIYQGIGPDAGHRAGWYNHYGRVWVLKSAPGAGNVFSG* +>4 +LFDRYDAGEQAVLVHIYFSQDKDMEDLQEFESLVYYAGVEAMQVITGSRKAPHPKYFVGEGKAVEIAEAVKATGAAVVLFDHALSPAQERNLERLCECRVIDRTGLILDIFAQRARTHEGKLQVELAQLRHLATRLVRGWTHLERRKGGIGSRGPGETQLEADRRLLRNRIVQIQSRLEKVEKQREQGRQSRIKADVPTVSLVGYTNAGKSTLFNQITEARVYAADQLFATLDPTLRRIDVADVGETVLADTVGFIRHLPYDLVAAFKATLQETRQATQLLHVVDAADVRVQENIEAVNTVLEEIDAHEFPTLMVMNKIDMLDDFEPRIDRDEENKPIRVWLSAQSGVGIPQLFQALTERLSGEVAQHTLRLLPQEGRLRSRFYQLQAIEKEWMEEDGSVSPQVRMPIVDWRRFCKQEPALIEYVI* +>5 +VAQRILVLGASGYIGQHLVFALSQQGHQVRAAARRVERLEKHRLANVSCHKVDLHWPENLPALLRDIDTVYYLVHGMGEGGDFIAHERQAALNVRDALRQTPVKQLIFLSSLQAPAHEQSDHLRARQLTADTLRDAGVPVTELRAGIIVGAGSAAFEVMRDMVYNLPILTPPRWVRSRTTPIALENLLYYLVGLLEHPAHEHRILEAAGPQVLSYQQQFERFMAVSGKRRPLIPVPFPTRWISVWFLNVITSVPPTTAKALIQGLRHDLLADDAALKKLIPQTLITFDDAVRRTLKEEEKLVNSSDWGYDALAFARWRPEYGYFPKQAGFTAQTPASLSALWQVVNRLGGKEGYFFGNILWQTRAAMDRLVGHKLAKGRPSHTLLKPGDTVDSWKVIIVEPEKQLTLLFGMKAPGLGRLSFTLHDKGRYREIDVRAWWHPHGMPGLIYWLLMIPAHLFIFRGMARRIARLAEQITEK* +>6 +MNKFARHFLPLSLRVRFLLATAGVVLVLSLAYGIVALVGYSVSFDKTTFRLLRGESNLFYTLAKWENNKICVELPENLDMQSPTMTVIYDETGKLLWTQRNIPWLIKSTQPEGLKTNGFHEIETNVDATSTLLSEDHSAQEKLKEVREDDDDAEMTHSVAVNIYPATTRMPQLTIVVVDTIPIELKGSYMVWSWFVYVLAANLLLVIPLLWIAAWWSLRPIDALSREVRELEDHHREMLNPETTRKLISLVRNLNQLLKSEHERYNKYRTSLTDLTHSLKTPLAVLQSTLRSLRNEKMSVSKAEPVMLEQISRISQQIGYYLHRASMRGSGVLLSRELHPVAPLLDNLISALNKVYLRKGVNISMDISPEISFVGEQNDFVEVMGNVLDNACKYCLEFVEISARQTDDHLHIFVEDDGPGIAHSKRSLVFDRGQRADTLRPGQGVGLAVAREITEQYAGQIIASDSLLGGARMEVVSGRQHPTQKEE* +>7 +MTIQKRLLEAVEQKLLRPIDAQFALTVAGNDDPAVTLAAALLSHDAGEGHVCLPLSRLTLTEEAHPLLVAWISETATPIDWKKRLLASAAVSCGDSPAPLILCGDRLYLNRMWCNERTVARFFNEVNQAIAVDEDQLSRILDALFPPTDEVNWQKVAAAVALTRRISVISGGPGTGKTTTVAKLLAALIQMADGERCRIRLAAPTGKAAARLTESLGAALRQLPLTDAQKKRIPEDASTLHRLLGAQPGSQRLRHHAGNPLHLDVLVVDEASMIDLPMMSRLIDALPPHGRVIFLGDRDQLASVEAGAVLGDICAYVNAGFTAERARQLSRLTGSAIPAGAGTQAASLRDSLCLLQKSYRFGSDSGIGKLAAAINCGDRSAIQAVFQQGFSDIEKRTLQSSDDYAGMLDEALAGYGRYLRLLHEKAAPEAILQAFNEYQLLCALREGPFGVRGLNDRIEQAMVQQRKIQRHPHSRWYEGRPVMIARNDSALGLFNGDIGIALDRGQGLRVWFVMPDGTIKSVQPSRLPEHDTTWAMTVHKSQGSEFDHAALILPSQRSPVVTRELVYTAVTRARRRLSLYADERILAGAIVTRTERRSGLATLFDEVSRIG* +>8 +MQEVAMSSQEASKMLRTYNIAWWGNNYYDVNELGHISVCPDPDVPEARVDLAKLVKAREAQGQRLPALFCFPQILQHRLRSINAAFKRARESYGYNGDYFLVYPIKVNQHRRVIESLIHSGEPLGLEAGSKAELMAVLAHAGMTRSVIVCNGYKDREYIRLALIGEKMGHKVYLVIEKMSEIAIVLEEAERLNVVPRLGVRARLASQGSGKWQSSGGEKSKFGLAATQVLQLVETLRDAGRLDSLQLLHFHLGSQMANIRDIATGVRESARFYVELHKLGVNIQCFDVGGGLGVDYEGTRSQSDCSVNYGLNEYANNIIWAIGDACEEHGLPHPTVITESGRAVTAHHTVLVSNIIGVERNEYTDPTAPAEDAPRALQNLWETWQEMHKPGTRRSLREWLHDSQMDLHDIHIGYSSGAFSLQERAWAEQLYLSMCHEVQKQLDPQNRAHRPIIDELQERMADKMYVNFSLFQSMPDAWGIDQLFPVLPLEGLDQVPERRAVLLDITCDSDGAIDHYIDGDGIATTMPMPEYDPENPPMLGFFMVGAYQEILGNMHNLFGDTEAVDVFVFPDGSVEVELSDEGDTVADMLQYVQLDPKTLLTHFRDQVKQTDLDDALQQQFLEEFEAGLYGYTYLEDE* +>9 +MNSLPQRSTDFELTTSQDGFALSWQQRLILRHSAENPCLWIGAGVADIDMFRGNFSIKDKLNEKIALTEATVSELPDGWLVQFSRGATISATLRISADEAGRLTLDLQNDDLHHNRIWLRLAANPDDHIYGCGEQFSYFDLRGKPFPLWTSEQGVGRNKTSYVTWQADCKENAGGDYYLTFFPQPTFVSTQKYYCHVDNSCYMNFDFSAPEYHELALWEDKTTLRFECADTYIALLEKLTALLGRQPELPDWVYDGVTLGIQGGTEVCQQKLDNMRNAGVKVYGIWAQDWSGIRMTSFGKRVMWNWKWNSDNYPQLDSRIKQWKEEGVQFLSYINPYVASDKDLCAEAARHGYLAKDATGGDYLVEFGEFYGGVVDLTNPEAYDWFKDVIKKNMIALGCSGWMADFGEYLPTDTYLHNGVSAELMHNAWPALWAKCNYEALQKTGKLGEILFFMRAGYTGSQKYSTMMWAGDQNVDWSLDDGLASVVPAALSLAMTGHGLHHSDIGGYTTLFDMKRSKELLLRWCDFSAFTPMMRTHEGNRPSNDWQFDGDAETIAHFARMTTVFTTLKPYLKQAVAQNAATGLPVMRPLFLHYENDAATYTLKYQYLLGQDLLVAPVHEQGRCDWTLYLPEDHWVNIWTGEAHHGGEISVDAPIGKPPVFYRAKSEWALLFASLRNI* +>10 +MKLFLTTAALTATLTSGMGFASDTVIPWATNSGGTESTHIAAMGEDVNAQHQHIIHTHEGVCAANSGTIQADEAALTSNKPPVQVQPELLPHQG* +>11 +MKHLRVVACMIMLALAGCDNNDKTAPTTKSEAPAVAQPSPAQDPSQLQKLAQQSQGKALTLLDASEAQLDGAATLVLTFSIPLDPEQDFSRVVHVVDKKSGSVDGAWELAPNLKELRLRHLEPERVLVVTVDPAVKALNNATFGKSYEKTITTRDVQPSVGFASRGSLLPGKIAEGLPVMALNVNHVDVNFFRVKPGSLASFVSQWEYRSSLSNWESDNLLKMADLVYTGRFDLNPARNTREKLLLPLSDIKPLQQAGVYVAVMNQAGHYNYSNAATLFTLSDIGVSAHRYHSRLDIFTQSLENGAAQSGIEIVLLNDKGQTLAQATSDAQGHVQLEADKAAALLLARKEGQTTLLDLTLPALDLSEFNVAGAPGYSKQFFMFGPRDLYRPGETVILNGLLRDSDGKTLPDQPVKLEVVKPDGQVMRTVVSQPENGLYRLNYPLDINAPTGLWHVRANTGDNLLRSWDFHVEDFMPERMALNLTAQKTPLAPADEVKFSVVGYYLYGAPANGNTLQGQLFLRPLRDAVAALPGFQFGNIAEENLSRSLDEVQLTLDKGGRGEVSAASQWQEAHSPLQVILQASLLESGGRPVTRRVEQAIWPADTLPGIRPQFAAKAVYDYRTDTTVNQPIVDEDSNAAFDIVYANAQGEKKAVSGLQVRLIRERRDYYWNWSESEGWQSQFDQKDLVEGEQTLDLNADETGKVSFPVEWGAYRLEVKAPNETVSSVRFWAGYSWQDNSDGSGAARPDRVTLKLDKANYRPGDTMKLHIAAPVAGKGYAMVESSDGPLWWQAIDVPAQGLELTIPVDKTWNRHDLYLSTLVVRPGDKSRSATPKRAVGLLHLPLGDDNRRLDLALESPAKMRPNQPLTVRVKASVKHGEMPKQINVLVSAVDSGVLNITDYATPDPWQAFFGQKRYGADIYDIYGQVIEGQGRLAALRFGGDGDDLTRGGKPPVNHANIIAQQAQPITLNEQGEGVVTLPIGDFNGELRVMAQAWTADDFGRGESKVVVAAPVIAELNMPRFLAGGDVSRLVLDVTNLTDRPQTLNIALAASGLLELLSQQPQPVNLAPGVRTTLFVPVRALEGFGEGEIQATISGLNLPGETLGAQHKQWQIGVRPAWPAQTVNSGIALAPGESWHVPEQHLANVSPATLQGQLLLSGKPPLNLARYIRELKAYPYGCLEQTTSGLFPALYTNAAQLQSLGITGDSDEKRRAAVDIGISRILQMQRDNGGFALWDENGAEEPWLTAYAMDFLIRAGEQGYSVPPEAINRGNERLLRYLQDPGTMLIRYSDNTQASTFAAQAYAALVLARQQKAPLGALREIWERRSQAASGLPLMQLGIALNTMGDARRGEEAITLALNTPRQDERQWIADYGSSLRDNALMLSLLEENNLRPDAQNALLSSLSEQAFGQRWLSTQENNALFLAAHSRQASAGAWQAQTSLEAQPLSGDKALTRNLDADQLAALEVTNTGSQPLWLRLDSSGYPSSAPEPASNVLQIERQILGTDGQRKSLSSLRSGELVLVWLTVVADRNVPDALVVDLLPAGLELENQNLADSSASLPESGSEVQNLLNQMQQADIQYMEFRDDRFVAAVVVNEGQPVTLVYLARAVTPGTYQLAQPQVESMYAPQWRATGASEGLLIVTP* +>12 +MAKNRSRRLRKKMHIDEFQELGFSVAWRFPEGTSEEQIDKTVDDFINDVIEPNKLAFDGSGYLAWEGLICMQEIGKCTEEHQAIVRKWLEARNLEEVRTSELFDVWWD* +>13 +LKLIPFYLLALFSASSGATEINACKDLIGTWKTPADNPPYTVTILPPVEACGEKCVKLNVEYELDVTHRNALYCHERQEGVKGQGPMVIAFEGAYGGHAIGTYNRQLQLLRAGVIPKNKKWKWITKMENYWFRRVKAH* +>14 +MKKFDSCHPVFLLIGCAQVPLRSSVSKPVQQPSAQKEQLANANGIDECQSLPYVPSDLAKNKSLSNQIADNTASKNSAISSRIFCEKYKQTKEQALTFFQELPQYMRSKEVEEQHMTEFKKVLLEPGSKNLSIYQTLLAAHERLQAL* +>15 +MRIKPDDNWRWYYDEEHDRMMLDLANGMLFRSRFSRKMLTPDAFCPTGFCVDDAALYFSFEEKCRDFELTKEQRAELVLNALVAIRYLKPQMPKSWHFVAHGEMWTPGTGDAASVWLSDTAEQVNLLVVEPGENAALCLLAQPGVVIAGRTMQLGDAIKIMNDRLKPQVHCHSFSLEQAV* +>16 +VLAFTLRFIKNKRYFAILAGALVIIAGLDSQHACSGNGLPQINGKALAALAKQHPVVVLFRHAERCDRSDNTCLPDSTGITVNGAQDARALGKAFSADIQNYNLYSSNTVRAIQSATWFSAGRSRRAVKKMMDCGSGIYSSINTLLKNSQIKNIVILTHNHCLTYIVKNKRGVKFDPEYLNALVMYAENGKLLLDGEFVPG* +>17 +MGSNYIVIEGLEGAGKTTARDVVVETLEQLGIRNMIFTREPGGTQLAEKLRSLVLDIRSVGDEVITDKAEVLMFYAARVQLVETVIKPALAQGVWVIGDRHDLSTQAYQGGGRGIDQTMLATLRDAVLGDFRPDLTLYLDVTPEVGLKRARARGDLDRIEQESFDFFNRTRARYLELAAQDSRIRTIDATQPLDAVMRDIRATVTKWVQEQAA* +>18 +MKHIKKSVLVVLLTSHVAHASIVVGGTRLVFDGNNDESSINVENKDSKANLVQSWLSVADPQVTNKQVFIITPPLFRLDAGQKNSIRVIRSGAPLPADRESMYWLNIKGIPSIDDNASANRVEISINTQIKLIYRPPALTKSTPDSQSQQLKWQTAGDVITVNNPTPYYMNFASVTLNSHEVKSATFVPPKSSASFKLSSTAAPHGTVTWRLISDYGMSLEPHSGSF* diff --git a/tests/test_data/outputs/search/G10/run.json b/tests/test_data/outputs/search/G10/run.json new file mode 100755 index 0000000..3ea5601 --- /dev/null +++ b/tests/test_data/outputs/search/G10/run.json @@ -0,0 +1,27 @@ +{ + "analysis_start_time": "10/06/2024 11:12:27", + "parameters": { + "query": "locidex/extract/G10/raw.extracted.seqs.fasta", + "outdir": "locidex/search/G10", + "name": "G10", + "db": "locidex/db", + "config": null, + "min_evalue": 0.0001, + "min_dna_len": 1, + "min_aa_len": 1, + "max_dna_len": 10000000, + "max_aa_len": 10000000, + "min_dna_ident": 80.0, + "min_aa_ident": 80.0, + "min_dna_match_cov": 80.0, + "min_aa_match_cov": 80.0, + "max_target_seqs": 10, + "n_threads": 8, + "format": null, + "translation_table": 11, + "annotate": false, + "force": true + }, + "result_file": "locidex/search/G10/seq_store.json", + "analysis_end_time": "10/06/2024 11:12:29" +} \ No newline at end of file diff --git a/tests/test_data/outputs/search/G10/seq_store.json b/tests/test_data/outputs/search/G10/seq_store.json new file mode 100755 index 0000000..0834466 --- /dev/null +++ b/tests/test_data/outputs/search/G10/seq_store.json @@ -0,0 +1,1685 @@ +{ + "db_info": { + "db_name": "Locidex Test Database", + "db_version": "1.0.0", + "db_date": "2024/06/06", + "db_author": "James Robertson", + "db_desc": "Using Salmonella GCA_000007545.2 as source data", + "db_num_seqs": 20, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" + }, + "db_seq_info": { + "0": { + "seq_id": 0, + "locus_name": "locus_1", + "locus_name_alt": "SALM_11273", + "locus_product": "!", + "locus_description": "hypothetical protein", + "locus_uid": "1", + "dna_seq_len": 102, + "dna_seq_hash": "d17b02d12afa7f832ee37df6f24a8f55", + "aa_seq_len": 34, + "aa_seq_hash": "84ac553cb45dd790c497c27152888f02", + "dna_min_len": 71.4, + "dna_max_len": 132.6, + "aa_min_len": 23.8, + "aa_max_len": 44.2, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "1": { + "seq_id": 1, + "locus_name": "locus_2", + "locus_name_alt": "SALM_120", + "locus_product": "@", + "locus_description": "outer membrane protein", + "locus_uid": "B", + "dna_seq_len": 285, + "dna_seq_hash": "e35184c8ff18e9116fc8faef20532f56", + "aa_seq_len": 95, + "aa_seq_hash": "2d9e7f7f0d11bf02db860b0799e7924d", + "dna_min_len": 199.5, + "dna_max_len": 370.5, + "aa_min_len": 66.5, + "aa_max_len": 123.5, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "2": { + "seq_id": 2, + "locus_name": "locus_3", + "locus_name_alt": "SALM_2016", + "locus_product": "#", + "locus_description": "tRNA (guanosine(46)-N7)-methyltransferase TrmB", + "locus_uid": "C", + "dna_seq_len": 327, + "dna_seq_hash": "670705cd2a59c4a23a897ac656a888fe", + "aa_seq_len": 109, + "aa_seq_hash": "4277f8bbf1fd6682001da089fea83d04", + "dna_min_len": 228.9, + "dna_max_len": 425.1, + "aa_min_len": 76.3, + "aa_max_len": 141.7, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "3": { + "seq_id": 3, + "locus_name": "locus_4", + "locus_name_alt": "SALM_8644", + "locus_product": "$", + "locus_description": "AZ624_004720", + "locus_uid": "AZ624_004720", + "dna_seq_len": 417, + "dna_seq_hash": "ac1b21798c0f672ad26f5a91ea278590", + "aa_seq_len": 139, + "aa_seq_hash": "0c25367401155278f34832f184ab44e6", + "dna_min_len": 291.9, + "dna_max_len": 542.1, + "aa_min_len": 97.3, + "aa_max_len": 180.7, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "4": { + "seq_id": 4, + "locus_name": "locus_5", + "locus_name_alt": "SALM_1876", + "locus_product": "%", + "locus_description": "SPI-1 type III secretion system invasion lipoprotein InvH", + "locus_uid": "E", + "dna_seq_len": 444, + "dna_seq_hash": "d00defcca8588f21ce16fa1d0ac13389", + "aa_seq_len": 148, + "aa_seq_hash": "c8bf12a8057fc5e541fcd4924136a40d", + "dna_min_len": 310.8, + "dna_max_len": 577.2, + "aa_min_len": 103.6, + "aa_max_len": 192.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "5": { + "seq_id": 5, + "locus_name": "locus_6", + "locus_name_alt": "SALM_640", + "locus_product": "^", + "locus_description": "MOSC domain-containing protein", + "locus_uid": "F", + "dna_seq_len": 543, + "dna_seq_hash": "a11561f2804e2c32c78049f8b9aeb517", + "aa_seq_len": 181, + "aa_seq_hash": "1f6a45ea291940ef2c17ec1cfdd5fbdd", + "dna_min_len": 380.1, + "dna_max_len": 705.9, + "aa_min_len": 126.7, + "aa_max_len": 235.3, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "6": { + "seq_id": 6, + "locus_name": "locus_7", + "locus_name_alt": "SALM_1501", + "locus_product": "&", + "locus_description": "India: Vellore", + "locus_uid": "G", + "dna_seq_len": 606, + "dna_seq_hash": "dc94bf1ec4ff9bed2a1f460cbd958656", + "aa_seq_len": 202, + "aa_seq_hash": "62252b3326997117f127efb88ff09294", + "dna_min_len": 424.2, + "dna_max_len": 787.8, + "aa_min_len": 141.4, + "aa_max_len": 262.6, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "7": { + "seq_id": 7, + "locus_name": "locus_8", + "locus_name_alt": "SALM_756", + "locus_product": "*", + "locus_description": "DNA polymerase III subunit delta'", + "locus_uid": "H", + "dna_seq_len": 642, + "dna_seq_hash": "7ebe74afecf146ec4db816c8deced64f", + "aa_seq_len": 214, + "aa_seq_hash": "2449629747a7c58f0f2cad411db87178", + "dna_min_len": 449.4, + "dna_max_len": 834.6, + "aa_min_len": 149.8, + "aa_max_len": 278.2, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "8": { + "seq_id": 8, + "locus_name": "locus_9", + "locus_name_alt": "SALM_7353", + "locus_product": "1", + "locus_description": "fimbrial assembly chaperone", + "locus_uid": "I", + "dna_seq_len": 684, + "dna_seq_hash": "41ebb36872854b2b33c8c028e23d8ad1", + "aa_seq_len": 228, + "aa_seq_hash": "a75a93991228940fb46d917f238beb5a", + "dna_min_len": 478.8, + "dna_max_len": 889.2, + "aa_min_len": 159.6, + "aa_max_len": 296.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "9": { + "seq_id": 9, + "locus_name": "locus_10", + "locus_name_alt": "SALM_1891", + "locus_product": "200.96", + "locus_description": "5'/3'-nucleotidase SurE", + "locus_uid": "J", + "dna_seq_len": 762, + "dna_seq_hash": "fe04d17ec353c08b903c85fc0ca4dc02", + "aa_seq_len": 254, + "aa_seq_hash": "bd09702e070040e0fc8d2ec3b830812c", + "dna_min_len": 533.4, + "dna_max_len": 990.6, + "aa_min_len": 177.8, + "aa_max_len": 330.2, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "10": { + "seq_id": 10, + "locus_name": "locus_11", + "locus_name_alt": "SALM_1452", + "locus_product": "|", + "locus_description": "1-phosphofructokinase", + "locus_uid": "K", + "dna_seq_len": 858, + "dna_seq_hash": "5b128d659955716833ce42f2bb060212", + "aa_seq_len": 286, + "aa_seq_hash": "4daecf1a6ccae76e1d97a4ce9ee4ff0b", + "dna_min_len": 600.6, + "dna_max_len": 1115.4, + "aa_min_len": 200.2, + "aa_max_len": 371.8, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "11": { + "seq_id": 11, + "locus_name": "locus_12", + "locus_name_alt": "SALM_11020", + "locus_product": "_", + "locus_description": "1", + "locus_uid": "L", + "dna_seq_len": 972, + "dna_seq_hash": "eb72da68c159497d5f0c8eeddc51b5ae", + "aa_seq_len": 324, + "aa_seq_hash": "a1f16dd269dc295e715f2d00f9c26b43", + "dna_min_len": 680.4, + "dna_max_len": 1263.6, + "aa_min_len": 226.8, + "aa_max_len": 421.2, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "12": { + "seq_id": 12, + "locus_name": "locus_13", + "locus_name_alt": "SALM_1934", + "locus_product": "-", + "locus_description": "3.9", + "locus_uid": "M", + "dna_seq_len": 1098, + "dna_seq_hash": "8f300259dcb46224bdc1fe5273107324", + "aa_seq_len": 366, + "aa_seq_hash": "b16bc26e42f5bc4327504b1ec8b2d53d", + "dna_min_len": 768.6, + "dna_max_len": 1427.4, + "aa_min_len": 256.2, + "aa_max_len": 475.8, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "13": { + "seq_id": 13, + "locus_name": "locus_14", + "locus_name_alt": "SALM_2871", + "locus_product": "+", + "locus_description": "@", + "locus_uid": "N", + "dna_seq_len": 1281, + "dna_seq_hash": "b9060019038526aa6fc38d2f7510edc6", + "aa_seq_len": 427, + "aa_seq_hash": "ed11561b2bedaa12c7a28eb0e9346101", + "dna_min_len": 896.7, + "dna_max_len": 1665.3, + "aa_min_len": 298.9, + "aa_max_len": 555.1, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "14": { + "seq_id": 14, + "locus_name": "locus_15", + "locus_name_alt": "SALM_583", + "locus_product": "=", + "locus_description": "DMT family transporter", + "locus_uid": "O", + "dna_seq_len": 1434, + "dna_seq_hash": "bc98c2fe196a68a79036814396513a8d", + "aa_seq_len": 478, + "aa_seq_hash": "16cb2acec887d5861327e04f2705b8ce", + "dna_min_len": 1003.8, + "dna_max_len": 1864.2, + "aa_min_len": 334.6, + "aa_max_len": 621.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "15": { + "seq_id": 15, + "locus_name": "locus_16", + "locus_name_alt": "SALM_780", + "locus_product": "<", + "locus_description": "murein transglycosylase A", + "locus_uid": "P", + "dna_seq_len": 1464, + "dna_seq_hash": "16e55766c603fe33c9e75d8e81743ae2", + "aa_seq_len": 488, + "aa_seq_hash": "b20314e55f9713235e9c4ea5817b56df", + "dna_min_len": 1024.8, + "dna_max_len": 1903.2, + "aa_min_len": 341.6, + "aa_max_len": 634.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "16": { + "seq_id": 16, + "locus_name": "locus_17", + "locus_name_alt": "SALM_1937", + "locus_product": ">", + "locus_description": "GTPase HflX", + "locus_uid": "Q", + "dna_seq_len": 1836, + "dna_seq_hash": "a0d97d985483413f3c18bfe5833ae9ce", + "aa_seq_len": 612, + "aa_seq_hash": "d6012eddc7a6e8d7d40761d00ed71a5a", + "dna_min_len": 1285.2, + "dna_max_len": 2386.8, + "aa_min_len": 428.4, + "aa_max_len": 795.6, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "17": { + "seq_id": 17, + "locus_name": "locus_18", + "locus_name_alt": "SALM_1997", + "locus_product": "?", + "locus_description": "biosynthetic arginine decarboxylase", + "locus_uid": "R", + "dna_seq_len": 1914, + "dna_seq_hash": "b3021e979faa7600756c06dfadfcf14c", + "aa_seq_len": 638, + "aa_seq_hash": "e68e83956ee1d4c685571e5348c8def1", + "dna_min_len": 1339.8, + "dna_max_len": 2488.2, + "aa_min_len": 446.6, + "aa_max_len": 829.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "18": { + "seq_id": 18, + "locus_name": "locus_19", + "locus_name_alt": "SALM_9926", + "locus_product": ",", + "locus_description": "https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662", + "locus_uid": "S", + "dna_seq_len": 2037, + "dna_seq_hash": "a012eee23637b48e39b00808a057e35d", + "aa_seq_len": 679, + "aa_seq_hash": "1302fea1dbd5bb756db34e09b863ad44", + "dna_min_len": 1425.9, + "dna_max_len": 2648.1, + "aa_min_len": 475.3, + "aa_max_len": 882.7, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "19": { + "seq_id": 19, + "locus_name": "locus_20", + "locus_name_alt": "SALM_6064", + "locus_product": ".", + "locus_description": "alpha-2-macroglobulin family protein", + "locus_uid": "T", + "dna_seq_len": 4935, + "dna_seq_hash": "4461918e985715e4a2b07494e1f91326", + "aa_seq_len": 1645, + "aa_seq_hash": "12ff39a1933fa478729b142976b0a659", + "dna_min_len": 3454.5, + "dna_max_len": 6415.5, + "aa_min_len": 1151.5, + "aa_max_len": 2138.5, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + } + }, + "query_data": { + "sample_name": "G10", + "query_seq_data": { + "0": { + "parent_id": "locus_1:0:0:0", + "locus_name": "locus_1:0:0:0", + "seq_id": "locus_1:0:0:0", + "dna_hash": "d17b02d12afa7f832ee37df6f24a8f55", + "dna_len": 102, + "aa_hash": "a931d1f75114576e60538364eb01a05f", + "aa_len": 34, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "1": { + "parent_id": "locus_10:9:0:1", + "locus_name": "locus_10:9:0:1", + "seq_id": "locus_10:9:0:1", + "dna_hash": "fe04d17ec353c08b903c85fc0ca4dc02", + "dna_len": 762, + "aa_hash": "988bf512f0362e276b0e5622fbaa7079", + "aa_len": 254, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "2": { + "parent_id": "locus_12:11:0:2", + "locus_name": "locus_12:11:0:2", + "seq_id": "locus_12:11:0:2", + "dna_hash": "eb72da68c159497d5f0c8eeddc51b5ae", + "dna_len": 972, + "aa_hash": "2fb77f6d0f615a030a08af74f3eda277", + "aa_len": 324, + "start_codon": "atg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "3": { + "parent_id": "locus_13:12:0:3", + "locus_name": "locus_13:12:0:3", + "seq_id": "locus_13:12:0:3", + "dna_hash": "8f300259dcb46224bdc1fe5273107324", + "dna_len": 1098, + "aa_hash": "fd5140611af6e0d9f95426bf9beaa8b1", + "aa_len": 366, + "start_codon": "atg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "4": { + "parent_id": "locus_14:13:0:4", + "locus_name": "locus_14:13:0:4", + "seq_id": "locus_14:13:0:4", + "dna_hash": "b9060019038526aa6fc38d2f7510edc6", + "dna_len": 1281, + "aa_hash": "05bc7823b1abc2e6d4e2c08ca5325134", + "aa_len": 427, + "start_codon": "ttg", + "stop_codon": "tag", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "5": { + "parent_id": "locus_15:14:0:5", + "locus_name": "locus_15:14:0:5", + "seq_id": "locus_15:14:0:5", + "dna_hash": "bc98c2fe196a68a79036814396513a8d", + "dna_len": 1434, + "aa_hash": "5d8c3cf06444c3741ebfc57cc92db0d3", + "aa_len": 478, + "start_codon": "gtg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "6": { + "parent_id": "locus_16:15:0:6", + "locus_name": "locus_16:15:0:6", + "seq_id": "locus_16:15:0:6", + "dna_hash": "16e55766c603fe33c9e75d8e81743ae2", + "dna_len": 1464, + "aa_hash": "f85b3701f5642454bf4d2263feb13354", + "aa_len": 488, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "7": { + "parent_id": "locus_17:16:0:7", + "locus_name": "locus_17:16:0:7", + "seq_id": "locus_17:16:0:7", + "dna_hash": "a0d97d985483413f3c18bfe5833ae9ce", + "dna_len": 1836, + "aa_hash": "bdd2b967a7a6337b5973dd723a62fa43", + "aa_len": 612, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "8": { + "parent_id": "locus_18:17:0:8", + "locus_name": "locus_18:17:0:8", + "seq_id": "locus_18:17:0:8", + "dna_hash": "b3021e979faa7600756c06dfadfcf14c", + "dna_len": 1914, + "aa_hash": "42c4a831ee79a27c47138fe96829814b", + "aa_len": 638, + "start_codon": "atg", + "stop_codon": "tag", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "9": { + "parent_id": "locus_19:18:0:9", + "locus_name": "locus_19:18:0:9", + "seq_id": "locus_19:18:0:9", + "dna_hash": "a012eee23637b48e39b00808a057e35d", + "dna_len": 2037, + "aa_hash": "cb1202450e68e2b4f0d557a645f1a98d", + "aa_len": 679, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "10": { + "parent_id": "locus_2:1:0:10", + "locus_name": "locus_2:1:0:10", + "seq_id": "locus_2:1:0:10", + "dna_hash": "e35184c8ff18e9116fc8faef20532f56", + "dna_len": 285, + "aa_hash": "2a1a77c25ad681437705d9145aef608c", + "aa_len": 95, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "11": { + "parent_id": "locus_20:19:0:11", + "locus_name": "locus_20:19:0:11", + "seq_id": "locus_20:19:0:11", + "dna_hash": "4461918e985715e4a2b07494e1f91326", + "dna_len": 4935, + "aa_hash": "ffe361d9273afbab0ed9009dd5aa4041", + "aa_len": 1645, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "12": { + "parent_id": "locus_3:2:0:12", + "locus_name": "locus_3:2:0:12", + "seq_id": "locus_3:2:0:12", + "dna_hash": "670705cd2a59c4a23a897ac656a888fe", + "dna_len": 327, + "aa_hash": "f46f4f7b2362abaf1440c9752e10670e", + "aa_len": 109, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "13": { + "parent_id": "locus_4:3:0:13", + "locus_name": "locus_4:3:0:13", + "seq_id": "locus_4:3:0:13", + "dna_hash": "ac1b21798c0f672ad26f5a91ea278590", + "dna_len": 417, + "aa_hash": "dbcec3a0e9ecdc165c4e9162b079f2ee", + "aa_len": 139, + "start_codon": "ctg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "14": { + "parent_id": "locus_5:4:0:14", + "locus_name": "locus_5:4:0:14", + "seq_id": "locus_5:4:0:14", + "dna_hash": "d00defcca8588f21ce16fa1d0ac13389", + "dna_len": 444, + "aa_hash": "82d8baa0a3dad18a0efd8104ee15baae", + "aa_len": 148, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "15": { + "parent_id": "locus_6:5:0:15", + "locus_name": "locus_6:5:0:15", + "seq_id": "locus_6:5:0:15", + "dna_hash": "a11561f2804e2c32c78049f8b9aeb517", + "dna_len": 543, + "aa_hash": "f7a904be6607ca5a6d9388ad9ca3693a", + "aa_len": 181, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "16": { + "parent_id": "locus_7:6:0:16", + "locus_name": "locus_7:6:0:16", + "seq_id": "locus_7:6:0:16", + "dna_hash": "dc94bf1ec4ff9bed2a1f460cbd958656", + "dna_len": 606, + "aa_hash": "da78b534d889d8f35bec304ef54f1b93", + "aa_len": 202, + "start_codon": "gtg", + "stop_codon": "tag", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "17": { + "parent_id": "locus_8:7:0:17", + "locus_name": "locus_8:7:0:17", + "seq_id": "locus_8:7:0:17", + "dna_hash": "7ebe74afecf146ec4db816c8deced64f", + "dna_len": 642, + "aa_hash": "ad5a47cb3a55985083b963b0cf67f382", + "aa_len": 214, + "start_codon": "atg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "18": { + "parent_id": "locus_9:8:0:18", + "locus_name": "locus_9:8:0:18", + "seq_id": "locus_9:8:0:18", + "dna_hash": "41ebb36872854b2b33c8c028e23d8ad1", + "dna_len": 684, + "aa_hash": "02989d1dbb505d15f06e76ddc9174b7e", + "aa_len": 228, + "start_codon": "atg", + "stop_codon": "tag", + "count_internal_stop": 0, + "dna_ambig_count": 0 + } + }, + "query_hit_columns": [], + "query_hits": { + "0": { + "nucleotide": [ + { + "qseqid": 0, + "sseqid": 0, + "qlen": 102, + "slen": 102, + "qstart": 1, + "qend": 102, + "sstart": 1, + "send": 102, + "length": 102, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 1.8e-51, + "bitscore": 189 + } + ], + "protein": [ + { + "qseqid": 0, + "sseqid": 0, + "qlen": 34, + "slen": 34, + "qstart": 1, + "qend": 34, + "sstart": 1, + "send": 34, + "length": 34, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 4.94e-20, + "bitscore": 64.3 + } + ] + }, + "1": { + "nucleotide": [ + { + "qseqid": 1, + "sseqid": 9, + "qlen": 762, + "slen": 762, + "qstart": 1, + "qend": 762, + "sstart": 1, + "send": 762, + "length": 762, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1408 + } + ], + "protein": [ + { + "qseqid": 1, + "sseqid": 9, + "qlen": 254, + "slen": 254, + "qstart": 1, + "qend": 254, + "sstart": 1, + "send": 254, + "length": 254, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 514.0 + } + ] + }, + "2": { + "nucleotide": [ + { + "qseqid": 2, + "sseqid": 11, + "qlen": 972, + "slen": 972, + "qstart": 1, + "qend": 972, + "sstart": 1, + "send": 972, + "length": 972, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1796 + } + ], + "protein": [ + { + "qseqid": 2, + "sseqid": 11, + "qlen": 324, + "slen": 324, + "qstart": 1, + "qend": 324, + "sstart": 1, + "send": 324, + "length": 324, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 640.0 + } + ] + }, + "3": { + "nucleotide": [ + { + "qseqid": 3, + "sseqid": 12, + "qlen": 1098, + "slen": 1098, + "qstart": 1, + "qend": 1098, + "sstart": 1, + "send": 1098, + "length": 1098, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 2028 + } + ], + "protein": [ + { + "qseqid": 3, + "sseqid": 12, + "qlen": 366, + "slen": 366, + "qstart": 1, + "qend": 366, + "sstart": 1, + "send": 366, + "length": 366, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 754.0 + } + ] + }, + "4": { + "nucleotide": [ + { + "qseqid": 4, + "sseqid": 13, + "qlen": 1281, + "slen": 1281, + "qstart": 1, + "qend": 1281, + "sstart": 1, + "send": 1281, + "length": 1281, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 2366 + } + ], + "protein": [ + { + "qseqid": 4, + "sseqid": 13, + "qlen": 427, + "slen": 427, + "qstart": 1, + "qend": 427, + "sstart": 1, + "send": 427, + "length": 427, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 869.0 + } + ] + }, + "5": { + "nucleotide": [ + { + "qseqid": 5, + "sseqid": 14, + "qlen": 1434, + "slen": 1434, + "qstart": 1, + "qend": 1434, + "sstart": 1, + "send": 1434, + "length": 1434, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 2649 + } + ], + "protein": [ + { + "qseqid": 5, + "sseqid": 14, + "qlen": 478, + "slen": 478, + "qstart": 1, + "qend": 478, + "sstart": 1, + "send": 478, + "length": 478, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 972.0 + } + ] + }, + "6": { + "nucleotide": [ + { + "qseqid": 6, + "sseqid": 15, + "qlen": 1464, + "slen": 1464, + "qstart": 1, + "qend": 1464, + "sstart": 1, + "send": 1464, + "length": 1464, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 2704 + } + ], + "protein": [ + { + "qseqid": 6, + "sseqid": 15, + "qlen": 488, + "slen": 488, + "qstart": 1, + "qend": 488, + "sstart": 1, + "send": 488, + "length": 488, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 1004.0 + } + ] + }, + "7": { + "nucleotide": [ + { + "qseqid": 7, + "sseqid": 16, + "qlen": 1836, + "slen": 1836, + "qstart": 1, + "qend": 1836, + "sstart": 1, + "send": 1836, + "length": 1836, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 3391 + } + ], + "protein": [ + { + "qseqid": 7, + "sseqid": 16, + "qlen": 612, + "slen": 612, + "qstart": 1, + "qend": 612, + "sstart": 1, + "send": 612, + "length": 612, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 1241.0 + } + ] + }, + "8": { + "nucleotide": [ + { + "qseqid": 8, + "sseqid": 17, + "qlen": 1914, + "slen": 1914, + "qstart": 1, + "qend": 1914, + "sstart": 1, + "send": 1914, + "length": 1914, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 3535 + } + ], + "protein": [ + { + "qseqid": 8, + "sseqid": 17, + "qlen": 638, + "slen": 638, + "qstart": 1, + "qend": 638, + "sstart": 1, + "send": 638, + "length": 638, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 1326.0 + } + ] + }, + "9": { + "nucleotide": [ + { + "qseqid": 9, + "sseqid": 18, + "qlen": 2037, + "slen": 2037, + "qstart": 1, + "qend": 2037, + "sstart": 1, + "send": 2037, + "length": 2037, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 3762 + } + ], + "protein": [ + { + "qseqid": 9, + "sseqid": 18, + "qlen": 679, + "slen": 679, + "qstart": 1, + "qend": 679, + "sstart": 1, + "send": 679, + "length": 679, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 1419.0 + } + ] + }, + "10": { + "nucleotide": [ + { + "qseqid": 10, + "sseqid": 1, + "qlen": 285, + "slen": 285, + "qstart": 1, + "qend": 285, + "sstart": 1, + "send": 285, + "length": 285, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 1.03e-152, + "bitscore": 527 + } + ], + "protein": [ + { + "qseqid": 10, + "sseqid": 1, + "qlen": 95, + "slen": 95, + "qstart": 1, + "qend": 95, + "sstart": 1, + "send": 95, + "length": 95, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 1.2299999999999998e-68, + "bitscore": 191.0 + } + ] + }, + "11": { + "nucleotide": [ + { + "qseqid": 11, + "sseqid": 19, + "qlen": 4935, + "slen": 4935, + "qstart": 1, + "qend": 4935, + "sstart": 1, + "send": 4935, + "length": 4935, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 9114 + } + ], + "protein": [ + { + "qseqid": 11, + "sseqid": 19, + "qlen": 1645, + "slen": 1645, + "qstart": 1, + "qend": 1645, + "sstart": 1, + "send": 1645, + "length": 1645, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 3332.0 + } + ] + }, + "12": { + "nucleotide": [ + { + "qseqid": 12, + "sseqid": 2, + "qlen": 327, + "slen": 327, + "qstart": 1, + "qend": 327, + "sstart": 1, + "send": 327, + "length": 327, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 5.34e-176, + "bitscore": 604 + } + ], + "protein": [ + { + "qseqid": 12, + "sseqid": 2, + "qlen": 109, + "slen": 109, + "qstart": 1, + "qend": 109, + "sstart": 1, + "send": 109, + "length": 109, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 3.09e-81, + "bitscore": 224.0 + } + ] + }, + "13": { + "nucleotide": [ + { + "qseqid": 13, + "sseqid": 3, + "qlen": 417, + "slen": 417, + "qstart": 1, + "qend": 417, + "sstart": 1, + "send": 417, + "length": 417, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 771 + } + ], + "protein": [ + { + "qseqid": 13, + "sseqid": 3, + "qlen": 139, + "slen": 139, + "qstart": 1, + "qend": 139, + "sstart": 1, + "send": 139, + "length": 139, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 2.34e-106, + "bitscore": 290.0 + } + ] + }, + "14": { + "nucleotide": [ + { + "qseqid": 14, + "sseqid": 4, + "qlen": 444, + "slen": 444, + "qstart": 1, + "qend": 444, + "sstart": 1, + "send": 444, + "length": 444, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 821 + } + ], + "protein": [ + { + "qseqid": 14, + "sseqid": 4, + "qlen": 148, + "slen": 148, + "qstart": 1, + "qend": 148, + "sstart": 1, + "send": 148, + "length": 148, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 1.0400000000000001e-110, + "bitscore": 302.0 + } + ] + }, + "15": { + "nucleotide": [ + { + "qseqid": 15, + "sseqid": 5, + "qlen": 543, + "slen": 543, + "qstart": 1, + "qend": 543, + "sstart": 1, + "send": 543, + "length": 543, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1003 + } + ], + "protein": [ + { + "qseqid": 15, + "sseqid": 5, + "qlen": 181, + "slen": 181, + "qstart": 1, + "qend": 181, + "sstart": 1, + "send": 181, + "length": 181, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 1.7899999999999999e-140, + "bitscore": 380.0 + } + ] + }, + "16": { + "nucleotide": [ + { + "qseqid": 16, + "sseqid": 6, + "qlen": 606, + "slen": 606, + "qstart": 1, + "qend": 606, + "sstart": 1, + "send": 606, + "length": 606, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1120 + } + ], + "protein": [ + { + "qseqid": 16, + "sseqid": 6, + "qlen": 202, + "slen": 202, + "qstart": 1, + "qend": 202, + "sstart": 1, + "send": 202, + "length": 202, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 3.71e-154, + "bitscore": 416.0 + } + ] + }, + "17": { + "nucleotide": [ + { + "qseqid": 17, + "sseqid": 7, + "qlen": 642, + "slen": 642, + "qstart": 1, + "qend": 642, + "sstart": 1, + "send": 642, + "length": 642, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1186 + } + ], + "protein": [ + { + "qseqid": 17, + "sseqid": 7, + "qlen": 214, + "slen": 214, + "qstart": 1, + "qend": 214, + "sstart": 1, + "send": 214, + "length": 214, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 1.75e-157, + "bitscore": 426.0 + } + ] + }, + "18": { + "nucleotide": [ + { + "qseqid": 18, + "sseqid": 8, + "qlen": 684, + "slen": 684, + "qstart": 1, + "qend": 684, + "sstart": 1, + "send": 684, + "length": 684, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1264 + } + ], + "protein": [ + { + "qseqid": 18, + "sseqid": 8, + "qlen": 228, + "slen": 228, + "qstart": 1, + "qend": 228, + "sstart": 1, + "send": 228, + "length": 228, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 6.32e-172, + "bitscore": 463.0 + } + ] + } + }, + "locus_profile": { + "locus_1": { + "nucleotide": [ + "0" + ], + "protein": [ + "0" + ] + }, + "locus_2": { + "nucleotide": [ + "10" + ], + "protein": [ + "10" + ] + }, + "locus_3": { + "nucleotide": [ + "12" + ], + "protein": [ + "12" + ] + }, + "locus_4": { + "nucleotide": [ + "13" + ], + "protein": [ + "13" + ] + }, + "locus_5": { + "nucleotide": [ + "14" + ], + "protein": [ + "14" + ] + }, + "locus_6": { + "nucleotide": [ + "15" + ], + "protein": [ + "15" + ] + }, + "locus_7": { + "nucleotide": [ + "16" + ], + "protein": [ + "16" + ] + }, + "locus_8": { + "nucleotide": [ + "17" + ], + "protein": [ + "17" + ] + }, + "locus_9": { + "nucleotide": [ + "18" + ], + "protein": [ + "18" + ] + }, + "locus_10": { + "nucleotide": [ + "1" + ], + "protein": [ + "1" + ] + }, + "locus_11": { + "nucleotide": [], + "protein": [] + }, + "locus_12": { + "nucleotide": [ + "2" + ], + "protein": [ + "2" + ] + }, + "locus_13": { + "nucleotide": [ + "3" + ], + "protein": [ + "3" + ] + }, + "locus_14": { + "nucleotide": [ + "4" + ], + "protein": [ + "4" + ] + }, + "locus_15": { + "nucleotide": [ + "5" + ], + "protein": [ + "5" + ] + }, + "locus_16": { + "nucleotide": [ + "6" + ], + "protein": [ + "6" + ] + }, + "locus_17": { + "nucleotide": [ + "7" + ], + "protein": [ + "7" + ] + }, + "locus_18": { + "nucleotide": [ + "8" + ], + "protein": [ + "8" + ] + }, + "locus_19": { + "nucleotide": [ + "9" + ], + "protein": [ + "9" + ] + }, + "locus_20": { + "nucleotide": [ + "11" + ], + "protein": [ + "11" + ] + } + } + }, + "query_hit_columns": [ + "qseqid", + "sseqid", + "qlen", + "slen", + "qstart", + "qend", + "sstart", + "send", + "length", + "mismatch", + "pident", + "qcovhsp", + "qcovs", + "sstrand", + "evalue", + "bitscore" + ] +} \ No newline at end of file diff --git a/tests/test_data/outputs/search/G11/blast/nucleotide/hsps.txt b/tests/test_data/outputs/search/G11/blast/nucleotide/hsps.txt new file mode 100755 index 0000000..ee96839 --- /dev/null +++ b/tests/test_data/outputs/search/G11/blast/nucleotide/hsps.txt @@ -0,0 +1,21 @@ +0 0 102 102 1 102 1 102 102 0 100.000 100 100 plus 1.80e-51 189 +1 9 762 762 1 762 1 762 762 0 100.000 100 100 plus 0.0 1408 +2 10 858 858 1 858 1 858 858 19 97.786 100 100 plus 0.0 1480 +3 11 972 972 1 972 1 972 972 0 100.000 100 100 plus 0.0 1796 +4 11 972 972 1 972 1 972 972 0 100.000 100 100 plus 0.0 1796 +5 12 1098 1098 1 1098 1 1098 1098 0 100.000 100 100 plus 0.0 2028 +6 13 1281 1281 1 1281 1 1281 1281 11 99.141 100 100 plus 0.0 2305 +7 14 1434 1434 1 1434 1 1434 1434 0 100.000 100 100 plus 0.0 2649 +8 15 1464 1464 1 1464 1 1464 1464 15 98.975 100 100 plus 0.0 2621 +9 16 1836 1836 1 1836 1 1836 1836 0 100.000 100 100 plus 0.0 3391 +10 17 1914 1914 1 1914 1 1914 1914 0 100.000 100 100 plus 0.0 3535 +11 18 2037 2037 1 2037 1 2037 2037 16 99.215 100 100 plus 0.0 3674 +12 1 285 285 1 285 1 285 285 17 94.035 100 100 plus 2.31e-124 433 +13 19 4935 4935 1 4935 1 4935 4935 0 100.000 100 100 plus 0.0 9114 +14 2 327 327 1 327 1 327 327 0 100.000 100 100 plus 5.34e-176 604 +15 3 417 417 1 417 1 417 417 11 97.362 100 100 plus 0.0 710 +16 4 444 444 1 444 1 444 444 15 96.622 100 100 plus 0.0 737 +17 5 543 543 1 543 1 543 543 0 100.000 100 100 plus 0.0 1003 +18 6 606 606 1 606 1 606 606 15 97.525 100 100 plus 0.0 1037 +19 7 642 642 1 642 1 642 642 0 100.000 100 100 plus 0.0 1186 +20 8 684 684 1 684 1 684 684 0 100.000 100 100 plus 0.0 1264 diff --git a/tests/test_data/outputs/search/G11/blast/nucleotide/queries.fasta b/tests/test_data/outputs/search/G11/blast/nucleotide/queries.fasta new file mode 100755 index 0000000..1e6d0ea --- /dev/null +++ b/tests/test_data/outputs/search/G11/blast/nucleotide/queries.fasta @@ -0,0 +1,42 @@ +>0 +atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa +>1 +atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa +>2 +gtgaacctggtgaaaaccacgggtctgcacgcggcgggcaaaggcattaacgttgccaaagtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccgacttcaacttttccggctttgacgtgactcccgcagactgggaacgctttgttaacgactccctgagctggctgggtcagttcgatatggtctgcgtcagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatctttgatagtagccgtgaagcgttagtcgccggtcttaaagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcacgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgtgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggtaagtaccgtcggcgcaggcgattccatggttggcggattgatttacggcctgctgatgcgcgagtccagcgaacatacgctgcgtctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga +>3 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>4 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>5 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>6 +ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctcttccgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccaacgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcagaaaggcgggattggtttgcgcggtccgggtgaaacccagctcgaaaccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgaaaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgctgctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaaatccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtctgcaggtacgaatgccgattgtcgactggcgtcgcctctgtaaacaagaaccagcgttgatcgaatacgtgatctag +>7 +gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga +>8 +atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctggtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatcagcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacgctgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcattcaaccggaatggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaacgctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgaggcgctggcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtgagctgaccagccttgtgcgcaaccttaatcaactgctcaaaagcgagcgcgaacgttataacaaataccgcacgaccctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatcagcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcataccccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttttggccgacagcatcccacacagaaagaggaataa +>9 +atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa +>10 +atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag +>11 +atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacattgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccaccgtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggccgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattactggaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcattcagggcggtacggaagtttgtcagcaaaaactggataccatgcgcaacgcaggcgtaaaagtgaacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggcgtcagcgcagagatcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacaggagaccggcaagctcggcgagatcctgttctttatgcgtgcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccggcaataactggcagttcgacggcgacgcggaaactattgcccactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgaaattaccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa +>12 +atgaaactttttctgacaacggcagcgctaactgcgaccttgatttccggcatggcatttgccagcgatcccgtcatcccctgggcgactaacagcggcggtacggaaagcacgcatattgcggcaatgggcgaggatttgaatgcgcagcatcagcagatcacccacacgcatgaaggcgtatgggcggccaattccggcagcattcaggctgatgaagccgcgctgaccagcaacaagccgccggtacaaggccacccggaattgatgccgcatcagggctaa +>13 +atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa +>14 +atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa +>15 +ctgaaactaatacctttttatttgctcgcgctattttctgccgcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaaccactgcagataacccgccttataccatgacaatattgccgccagtagaaggctgcggggaaaaatgtgtgaaactgaacgtacagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaaggacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctataaccgacagctgcaattactttgggctggcgtaataccgaaaaataaaaaggggaaatggataacgaaaatggaaaattactggttcaggcaggtaaaagcgcactaa +>16 +atgaaaaaattttatagctgtcttcctgtctttttactgatcggctgtgcccaggtgcccctcccttcctccgtgagcaaaccggtacagcaacctggcgctcagaaagagcaactggccaacgcaaatagtattgatgagtgtcagtctcttccgtatgtgccgtcagaccttgcgaagaataaatcattatcaaaccagaacgctgataattccgcatcaaaaaatagcgcaatcagctcaagcattttttgcgaaaaatataaacaaaccaaagaacaggcgcttaccttcttccaggaacatccacaatacatgcgttcgaaagaagatgaagagcaactcatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa +>17 +atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa +>18 +gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggcctcacaacatgcctggagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgtcagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcaccatccagtcggcaacctggttttccgcaggcaggtcgcttacggcggataaaaaaatgatggattgcggcagcggtatttatgcttcaatcaatacgttactgaaaaagtcgcagaataaaaatatagtcatttttacccataatcattgtttgacgtatattgcaaaaaataagcgcggcgtgaagtttgatccggactatcttaatgcgctcgttatgtatgcggaaaacggaaaactctttttagacggtgaatttgttcccggctag +>19 +atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga +>20 +atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/search/G11/blast/protein/hsps.txt b/tests/test_data/outputs/search/G11/blast/protein/hsps.txt new file mode 100755 index 0000000..f9da50e --- /dev/null +++ b/tests/test_data/outputs/search/G11/blast/protein/hsps.txt @@ -0,0 +1,21 @@ +0 0 34 34 1 34 1 34 34 0 100.000 100 100 N/A 4.94e-20 64.3 +1 9 254 254 1 254 1 254 254 0 100.000 100 100 N/A 0.0 514 +2 10 286 286 1 286 1 286 286 17 94.056 100 100 N/A 0.0 518 +3 11 324 324 1 324 1 324 324 0 100.000 100 100 N/A 0.0 640 +4 11 324 324 1 324 1 324 324 0 100.000 100 100 N/A 0.0 640 +5 12 366 366 1 366 1 366 366 0 100.000 100 100 N/A 0.0 754 +6 13 427 427 1 427 1 427 427 9 97.892 100 100 N/A 0.0 846 +7 14 478 478 1 478 1 478 478 0 100.000 100 100 N/A 0.0 972 +8 15 488 488 1 488 1 488 488 14 97.131 100 100 N/A 0.0 972 +9 16 612 612 1 612 1 612 612 0 100.000 100 100 N/A 0.0 1241 +10 17 638 638 1 638 1 638 638 0 100.000 100 100 N/A 0.0 1326 +11 18 679 679 1 679 1 679 679 8 98.822 100 100 N/A 0.0 1403 +12 1 95 95 1 95 1 95 95 11 88.421 100 100 N/A 2.89e-51 147 +13 19 1645 1645 1 1645 1 1645 1645 0 100.000 100 100 N/A 0.0 3332 +14 2 109 109 1 109 1 109 109 0 100.000 100 100 N/A 3.09e-81 224 +15 3 139 139 1 139 1 139 139 9 93.525 100 100 N/A 3.70e-98 269 +16 4 148 148 1 148 1 148 148 11 92.568 100 100 N/A 1.56e-97 268 +17 5 181 181 1 181 1 181 181 0 100.000 100 100 N/A 1.79e-140 380 +18 6 202 202 1 202 1 202 202 14 93.069 100 100 N/A 2.36e-141 384 +19 7 214 214 1 214 1 214 214 0 100.000 100 100 N/A 1.75e-157 426 +20 8 228 228 1 228 1 228 228 0 100.000 100 100 N/A 6.32e-172 463 diff --git a/tests/test_data/outputs/search/G11/blast/protein/queries.fasta b/tests/test_data/outputs/search/G11/blast/protein/queries.fasta new file mode 100755 index 0000000..7d902a9 --- /dev/null +++ b/tests/test_data/outputs/search/G11/blast/protein/queries.fasta @@ -0,0 +1,42 @@ +>0 +MYDPPFLEALMITASFFAIFIIIVVSVLLLEGD* +>1 +MRILLSNDDGVHAPGIQTLAKALREFADVQVVAPDRNRSGASNSLTLESSLRTFTFDNGDIAVQMGTPTDCVYLGVNALMRPRPDIVVSGINAGPNLGDDVIYSGTVAAAMEGRHLGFPALAVSLNGYQHYDTAAAVTCALLRGLSREPLRTGRILNVNVPDLPLAQVKGIRVTRCGSRHPADKVIPQEDPRGNTLYWIGPPGDKYDAGPDTDFAAVDEGYVSVTPLHVDLTAHSAHDVVSDWLDSVGVGTQW* +>2 +VNLVKTTGLHAAGKGINVAKVLKDLGIDVTVGGFLGKDNQDGFQQLFSELGIANRFQVVQGRTRINVKLTEKDGEVTDFNFSGFDVTPADWERFVNDSLSWLGQFDMVCVSGSLPAGVSPEAFTDWMTRLRSQCPCIIFDSSREALVAGLKAAPWLVKPNRRELEIWAGRKLPEMKDVIDAAHALREQGIAHVVISLGAEGALWVNASGEWIAKPPAVDVVSTVGAGDSMVGGLIYGLLMRESSEHTLRLATAVAALAVSQSNVGITDRPQLAAMMARVDLQPFN* +>3 +MNKNKYSTPLLMLATILAGMLSPMQSAVNGQLGHWLQDGNACAVISFASGLVVMFFIIIARKETRQQFASIPTLIKKRKIPLWNWFAGLCGAMVVFSEGASASALGVATFQTALISALLLSGLLCDRFGIGVEEKKYFTPWRITGALFAVIATIFVVSPQWHSTSFILLAILPFLAGLLAGWQPAGNAKVAEATGSMLVSITWNFIVGFCVLGAALAIRIALGHVTIQLPDTWWMYLGGPLGLLSIGLMAILVRGLGLLMLGVASTAGQLLGSVLIDELIPSLGNTVYLVTIIGTLFALVGAIVTTIPEYRASKMAQKMEVSE* +>4 +MNKNKYSTPLLMLATILAGMLSPMQSAVNGQLGHWLQDGNACAVISFASGLVVMFFIIIARKETRQQFASIPTLIKKRKIPLWNWFAGLCGAMVVFSEGASASALGVATFQTALISALLLSGLLCDRFGIGVEEKKYFTPWRITGALFAVIATIFVVSPQWHSTSFILLAILPFLAGLLAGWQPAGNAKVAEATGSMLVSITWNFIVGFCVLGAALAIRIALGHVTIQLPDTWWMYLGGPLGLLSIGLMAILVRGLGLLMLGVASTAGQLLGSVLIDELIPSLGNTVYLVTIIGTLFALVGAIVTTIPEYRASKMAQKMEVSE* +>5 +MKGRWAKYVATGVMLAMLAACSSKPTDRGQQYKDGKFTQPFSLVNQPDAVGAPINAGDFAEQVDQIRSASPRLYTNQSNVYNAVQNWLRSGGDTRTMRQFGIDAWQMEGTDNYGNVQFTGYYTPVVQARHTRQGAFQYPIYSMPPKRGRLPSRAQIYAGALSDKYILAWSNSLMDNFIMDVQGSGYIDFGDGSPLNFFSYAGKNGWPYRSIGKVLIDRGEVKKEDMSMQAIREWGEKHSEAEVRELLEQNPSFVFFKPQSFAPVKGASAVPLIGRASVASDRSIIPPGTTLLAEVPLLDNNGKFSGQYELRLMVALDVGGAIKGQHFDIYQGIGPDAGHRAGWYNHYGRVWVLKSAPGAGNVFSG* +>6 +LFDRYDAGEQAVLVHIYFSQDKDMEDLQEFESLVSSAGVEAMQVITGSRKAPHPKYFVGEGKAVEIAEAVKATGAAVVLFDHALSPAQERNLERLCECRVIDRTGLILDIFAQRARTHEGKLQVELAQLRHLATRLVRGWTHLERQKGGIGLRGPGETQLETDRRLLRNRIVQIQSRLEKVEKQREQGRQSRIKADVPTVSLVGYTNAGKSTLFNQITEARVYAADQLFATLDPTLRRIDVADVGETVLADTVGFIRHLPYDLVAAFKATLQETRQATLLLHVVDAADVRVQENIEAVNTVLEEIDAHEIPTLMVMNKIDMLDDFEPRIDRDEENKPIRVWLSAQSGVGIPQLFQALTERLSGEVAQHTLRLLPQEGRLRSRFYQLQAIEKEWMEEDGSVSLQVRMPIVDWRRLCKQEPALIEYVI* +>7 +VAQRILVLGASGYIGQHLVFALSQQGHQVRAAARRVERLEKHRLANVSCHKVDLHWPENLPALLRDIDTVYYLVHGMGEGGDFIAHERQAALNVRDALRQTPVKQLIFLSSLQAPAHEQSDHLRARQLTADTLRDAGVPVTELRAGIIVGAGSAAFEVMRDMVYNLPILTPPRWVRSRTTPIALENLLYYLVGLLEHPAHEHRILEAAGPQVLSYQQQFERFMAVSGKRRPLIPVPFPTRWISVWFLNVITSVPPTTAKALIQGLRHDLLADDAALKKLIPQTLITFDDAVRRTLKEEEKLVNSSDWGYDALAFARWRPEYGYFPKQAGFTAQTPASLSALWQVVNRLGGKEGYFFGNILWQTRAAMDRLVGHKLAKGRPSHTLLKPGDTVDSWKVIIVEPEKQLTLLFGMKAPGLGRLSFTLHDKGRYREIDVRAWWHPHGMPGLIYWLLMIPAHLFIFRGMARRIARLAEQITEK* +>8 +MNKFARHFLPLSLRVRFLLATAGVVLVLSLAYGIVALVGYSVSFDKTTFRLLRGESNLFYTLAKWENNKISVELPENLDMQSPTMTLIYDETGKLLWTQRNIPWLIKSIQPEWLKTNGFHEIETNVDATSTLLSEDHSAQEKLKEVREDDDDAEMTHSVAVNIYPATTRMPQLTIVVVDTIPIELKRSYMVWSWFVYVLAANLLLVIPLLWIAAWWSLRPIEALAREVRELEDHHREMLNPETTRELTSLVRNLNQLLKSERERYNKYRTTLTDLTHSLKTPLAVLQSTLRSLRNEKMSVSKAEPVMLEQISRISQQIGYYLHRASMRGSGVLLSRELHPVAPLLDNLISALNKVYQRKGVNISMDISPEISFVGEQNDFVEVMGNVLDNACKYCLEFVEISARQTDDHLHIFVEDDGPGIPHSKRSLVFDRGQRADTLRPGQGVGLAVAREITEQYAGQIIASDSLLGGARMEVVFGRQHPTQKEE* +>9 +MTIQKRLLEAVEQKLLRPIDAQFALTVAGNDDPAVTLAAALLSHDAGEGHVCLPLSRLTLTEEAHPLLVAWISETATPIDWKKRLLASAAVSCGDSPAPLILCGDRLYLNRMWCNERTVARFFNEVNQAIAVDEDQLSRILDALFPPTDEVNWQKVAAAVALTRRISVISGGPGTGKTTTVAKLLAALIQMADGERCRIRLAAPTGKAAARLTESLGAALRQLPLTDAQKKRIPEDASTLHRLLGAQPGSQRLRHHAGNPLHLDVLVVDEASMIDLPMMSRLIDALPPHGRVIFLGDRDQLASVEAGAVLGDICAYVNAGFTAERARQLSRLTGSAIPAGAGTQAASLRDSLCLLQKSYRFGSDSGIGKLAAAINCGDRSAIQAVFQQGFSDIEKRTLQSSDDYAGMLDEALAGYGRYLRLLHEKAAPEAILQAFNEYQLLCALREGPFGVRGLNDRIEQAMVQQRKIQRHPHSRWYEGRPVMIARNDSALGLFNGDIGIALDRGQGLRVWFVMPDGTIKSVQPSRLPEHDTTWAMTVHKSQGSEFDHAALILPSQRSPVVTRELVYTAVTRARRRLSLYADERILAGAIVTRTERRSGLATLFDEVSRIG* +>10 +MQEVAMSSQEASKMLRTYNIAWWGNNYYDVNELGHISVCPDPDVPEARVDLAKLVKAREAQGQRLPALFCFPQILQHRLRSINAAFKRARESYGYNGDYFLVYPIKVNQHRRVIESLIHSGEPLGLEAGSKAELMAVLAHAGMTRSVIVCNGYKDREYIRLALIGEKMGHKVYLVIEKMSEIAIVLEEAERLNVVPRLGVRARLASQGSGKWQSSGGEKSKFGLAATQVLQLVETLRDAGRLDSLQLLHFHLGSQMANIRDIATGVRESARFYVELHKLGVNIQCFDVGGGLGVDYEGTRSQSDCSVNYGLNEYANNIIWAIGDACEEHGLPHPTVITESGRAVTAHHTVLVSNIIGVERNEYTDPTAPAEDAPRALQNLWETWQEMHKPGTRRSLREWLHDSQMDLHDIHIGYSSGAFSLQERAWAEQLYLSMCHEVQKQLDPQNRAHRPIIDELQERMADKMYVNFSLFQSMPDAWGIDQLFPVLPLEGLDQVPERRAVLLDITCDSDGAIDHYIDGDGIATTMPMPEYDPENPPMLGFFMVGAYQEILGNMHNLFGDTEAVDVFVFPDGSVEVELSDEGDTVADMLQYVQLDPKTLLTHFRDQVKQTDLDDALQQQFLEEFEAGLYGYTYLEDE* +>11 +MNSLPQRSTDFELTTSQDGFALSWQQRLILRHSAENPCLWIGAGVADIDMFRGNFSIKDKLNEKIALTEATVSELPDGWLVQFSRGATISATLRISADEAGRLTLDLQNDDLHHNRIWLRLAANPDDHIYGCGEQFSYFDLRGKPFPLWTSEQGVGRNKTSYVTWQADCKENAGGDYYWTFFPQPTFVSTQKYYCHVDNSCYMNFDFSAPEYHELALWEDKTTLRFECADTYIALLEKLTALLGRQPELPDWVYDGVTLGIQGGTEVCQQKLDTMRNAGVKVNGIWAQDWSGIRMTSFGKRVMWNWKWNSDNYPQLDSRIKQWKEEGVQFLSYINPYVASDKDLCAEAARHGYLAKDATGGDYLVEFGEFYGGVVDLTNPEAYDWFKDVIKKNMIALGCSGWMADFGEYLPTDTYLHNGVSAEIMHNAWPALWAKCNYEALQETGKLGEILFFMRAGYTGSQKYSTMMWAGDQNVDWSLDDGLASVVPAALSLAMTGHGLHHSDIGGYTTLFDMKRSKELLLRWCDFSAFTPMMRTHEGNRPGNNWQFDGDAETIAHFARMTTVFTTLKPYLKQAVAQNAATGLPVMRPLFLHYENDAATYTLKYQYLLGQDLLVAPVHEQGRCDWTLYLPEDHWVNIWTGEAHHGGEITVDAPIGKPPVFYRAKSEWALLFASLRNI* +>12 +MKLFLTTAALTATLISGMAFASDPVIPWATNSGGTESTHIAAMGEDLNAQHQQITHTHEGVWAANSGSIQADEAALTSNKPPVQGHPELMPHQG* +>13 +MKHLRVVACMIMLALAGCDNNDKTAPTTKSEAPAVAQPSPAQDPSQLQKLAQQSQGKALTLLDASEAQLDGAATLVLTFSIPLDPEQDFSRVVHVVDKKSGSVDGAWELAPNLKELRLRHLEPERVLVVTVDPAVKALNNATFGKSYEKTITTRDVQPSVGFASRGSLLPGKIAEGLPVMALNVNHVDVNFFRVKPGSLASFVSQWEYRSSLSNWESDNLLKMADLVYTGRFDLNPARNTREKLLLPLSDIKPLQQAGVYVAVMNQAGHYNYSNAATLFTLSDIGVSAHRYHSRLDIFTQSLENGAAQSGIEIVLLNDKGQTLAQATSDAQGHVQLEADKAAALLLARKEGQTTLLDLTLPALDLSEFNVAGAPGYSKQFFMFGPRDLYRPGETVILNGLLRDSDGKTLPDQPVKLEVVKPDGQVMRTVVSQPENGLYRLNYPLDINAPTGLWHVRANTGDNLLRSWDFHVEDFMPERMALNLTAQKTPLAPADEVKFSVVGYYLYGAPANGNTLQGQLFLRPLRDAVAALPGFQFGNIAEENLSRSLDEVQLTLDKGGRGEVSAASQWQEAHSPLQVILQASLLESGGRPVTRRVEQAIWPADTLPGIRPQFAAKAVYDYRTDTTVNQPIVDEDSNAAFDIVYANAQGEKKAVSGLQVRLIRERRDYYWNWSESEGWQSQFDQKDLVEGEQTLDLNADETGKVSFPVEWGAYRLEVKAPNETVSSVRFWAGYSWQDNSDGSGAARPDRVTLKLDKANYRPGDTMKLHIAAPVAGKGYAMVESSDGPLWWQAIDVPAQGLELTIPVDKTWNRHDLYLSTLVVRPGDKSRSATPKRAVGLLHLPLGDDNRRLDLALESPAKMRPNQPLTVRVKASVKHGEMPKQINVLVSAVDSGVLNITDYATPDPWQAFFGQKRYGADIYDIYGQVIEGQGRLAALRFGGDGDDLTRGGKPPVNHANIIAQQAQPITLNEQGEGVVTLPIGDFNGELRVMAQAWTADDFGRGESKVVVAAPVIAELNMPRFLAGGDVSRLVLDVTNLTDRPQTLNIALAASGLLELLSQQPQPVNLAPGVRTTLFVPVRALEGFGEGEIQATISGLNLPGETLGAQHKQWQIGVRPAWPAQTVNSGIALAPGESWHVPEQHLANVSPATLQGQLLLSGKPPLNLARYIRELKAYPYGCLEQTTSGLFPALYTNAAQLQSLGITGDSDEKRRAAVDIGISRILQMQRDNGGFALWDENGAEEPWLTAYAMDFLIRAGEQGYSVPPEAINRGNERLLRYLQDPGTMLIRYSDNTQASTFAAQAYAALVLARQQKAPLGALREIWERRSQAASGLPLMQLGIALNTMGDARRGEEAITLALNTPRQDERQWIADYGSSLRDNALMLSLLEENNLRPDAQNALLSSLSEQAFGQRWLSTQENNALFLAAHSRQASAGAWQAQTSLEAQPLSGDKALTRNLDADQLAALEVTNTGSQPLWLRLDSSGYPSSAPEPASNVLQIERQILGTDGQRKSLSSLRSGELVLVWLTVVADRNVPDALVVDLLPAGLELENQNLADSSASLPESGSEVQNLLNQMQQADIQYMEFRDDRFVAAVVVNEGQPVTLVYLARAVTPGTYQLAQPQVESMYAPQWRATGASEGLLIVTP* +>14 +MAKNRSRRLRKKMHIDEFQELGFSVAWRFPEGTSEEQIDKTVDDFINDVIEPNKLAFDGSGYLAWEGLICMQEIGKCTEEHQAIVRKWLEARNLEEVRTSELFDVWWD* +>15 +LKLIPFYLLALFSAASGATEINACKDLIGTWKTTADNPPYTMTILPPVEGCGEKCVKLNVQYELDVTHRNALYCHEGQEGVKGQGPMVIAFEGAYGGHAIGTYNRQLQLLWAGVIPKNKKGKWITKMENYWFRQVKAH* +>16 +MKKFYSCLPVFLLIGCAQVPLPSSVSKPVQQPGAQKEQLANANSIDECQSLPYVPSDLAKNKSLSNQNADNSASKNSAISSSIFCEKYKQTKEQALTFFQEHPQYMRSKEDEEQLMTEFKKVLLEPGSKNLSIYQTLLAAHERLQAL* +>17 +MRIKPDDNWRWYYDEEHDRMMLDLANGMLFRSRFSRKMLTPDAFCPTGFCVDDAALYFSFEEKCRDFELTKEQRAELVLNALVAIRYLKPQMPKSWHFVAHGEMWTPGTGDAASVWLSDTAEQVNLLVVEPGENAALCLLAQPGVVIAGRTMQLGDAIKIMNDRLKPQVHCHSFSLEQAV* +>18 +VLAFTLRFIKNKRYFAILAGALVIIAGLASQHAWSGNGLPQINGKALAALAKQHPVVVLFRHAERCDRSDNTCLSDSTGITVNGAQDARALGKAFSADIQNYNLYSSNTVRTIQSATWFSAGRSLTADKKMMDCGSGIYASINTLLKKSQNKNIVIFTHNHCLTYIAKNKRGVKFDPDYLNALVMYAENGKLFLDGEFVPG* +>19 +MGSNYIVIEGLEGAGKTTARDVVVETLEQLGIRNMIFTREPGGTQLAEKLRSLVLDIRSVGDEVITDKAEVLMFYAARVQLVETVIKPALAQGVWVIGDRHDLSTQAYQGGGRGIDQTMLATLRDAVLGDFRPDLTLYLDVTPEVGLKRARARGDLDRIEQESFDFFNRTRARYLELAAQDSRIRTIDATQPLDAVMRDIRATVTKWVQEQAA* +>20 +MKHIKKSVLVVLLTSHVAHASIVVGGTRLVFDGNNDESSINVENKDSKANLVQSWLSVADPQVTNKQVFIITPPLFRLDAGQKNSIRVIRSGAPLPADRESMYWLNIKGIPSIDDNASANRVEISINTQIKLIYRPPALTKSTPDSQSQQLKWQTAGDVITVNNPTPYYMNFASVTLNSHEVKSATFVPPKSSASFKLSSTAAPHGTVTWRLISDYGMSLEPHSGSF* diff --git a/tests/test_data/outputs/search/G11/run.json b/tests/test_data/outputs/search/G11/run.json new file mode 100755 index 0000000..fe00e54 --- /dev/null +++ b/tests/test_data/outputs/search/G11/run.json @@ -0,0 +1,27 @@ +{ + "analysis_start_time": "10/06/2024 11:12:32", + "parameters": { + "query": "locidex/extract/G11/raw.extracted.seqs.fasta", + "outdir": "locidex/search/G11", + "name": "G11", + "db": "locidex/db", + "config": null, + "min_evalue": 0.0001, + "min_dna_len": 1, + "min_aa_len": 1, + "max_dna_len": 10000000, + "max_aa_len": 10000000, + "min_dna_ident": 80.0, + "min_aa_ident": 80.0, + "min_dna_match_cov": 80.0, + "min_aa_match_cov": 80.0, + "max_target_seqs": 10, + "n_threads": 8, + "format": null, + "translation_table": 11, + "annotate": false, + "force": true + }, + "result_file": "locidex/search/G11/seq_store.json", + "analysis_end_time": "10/06/2024 11:12:34" +} \ No newline at end of file diff --git a/tests/test_data/outputs/search/G11/seq_store.json b/tests/test_data/outputs/search/G11/seq_store.json new file mode 100755 index 0000000..0b6bcd9 --- /dev/null +++ b/tests/test_data/outputs/search/G11/seq_store.json @@ -0,0 +1,1801 @@ +{ + "db_info": { + "db_name": "Locidex Test Database", + "db_version": "1.0.0", + "db_date": "2024/06/06", + "db_author": "James Robertson", + "db_desc": "Using Salmonella GCA_000007545.2 as source data", + "db_num_seqs": 20, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" + }, + "db_seq_info": { + "0": { + "seq_id": 0, + "locus_name": "locus_1", + "locus_name_alt": "SALM_11273", + "locus_product": "!", + "locus_description": "hypothetical protein", + "locus_uid": "1", + "dna_seq_len": 102, + "dna_seq_hash": "d17b02d12afa7f832ee37df6f24a8f55", + "aa_seq_len": 34, + "aa_seq_hash": "84ac553cb45dd790c497c27152888f02", + "dna_min_len": 71.4, + "dna_max_len": 132.6, + "aa_min_len": 23.8, + "aa_max_len": 44.2, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "1": { + "seq_id": 1, + "locus_name": "locus_2", + "locus_name_alt": "SALM_120", + "locus_product": "@", + "locus_description": "outer membrane protein", + "locus_uid": "B", + "dna_seq_len": 285, + "dna_seq_hash": "e35184c8ff18e9116fc8faef20532f56", + "aa_seq_len": 95, + "aa_seq_hash": "2d9e7f7f0d11bf02db860b0799e7924d", + "dna_min_len": 199.5, + "dna_max_len": 370.5, + "aa_min_len": 66.5, + "aa_max_len": 123.5, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "2": { + "seq_id": 2, + "locus_name": "locus_3", + "locus_name_alt": "SALM_2016", + "locus_product": "#", + "locus_description": "tRNA (guanosine(46)-N7)-methyltransferase TrmB", + "locus_uid": "C", + "dna_seq_len": 327, + "dna_seq_hash": "670705cd2a59c4a23a897ac656a888fe", + "aa_seq_len": 109, + "aa_seq_hash": "4277f8bbf1fd6682001da089fea83d04", + "dna_min_len": 228.9, + "dna_max_len": 425.1, + "aa_min_len": 76.3, + "aa_max_len": 141.7, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "3": { + "seq_id": 3, + "locus_name": "locus_4", + "locus_name_alt": "SALM_8644", + "locus_product": "$", + "locus_description": "AZ624_004720", + "locus_uid": "AZ624_004720", + "dna_seq_len": 417, + "dna_seq_hash": "ac1b21798c0f672ad26f5a91ea278590", + "aa_seq_len": 139, + "aa_seq_hash": "0c25367401155278f34832f184ab44e6", + "dna_min_len": 291.9, + "dna_max_len": 542.1, + "aa_min_len": 97.3, + "aa_max_len": 180.7, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "4": { + "seq_id": 4, + "locus_name": "locus_5", + "locus_name_alt": "SALM_1876", + "locus_product": "%", + "locus_description": "SPI-1 type III secretion system invasion lipoprotein InvH", + "locus_uid": "E", + "dna_seq_len": 444, + "dna_seq_hash": "d00defcca8588f21ce16fa1d0ac13389", + "aa_seq_len": 148, + "aa_seq_hash": "c8bf12a8057fc5e541fcd4924136a40d", + "dna_min_len": 310.8, + "dna_max_len": 577.2, + "aa_min_len": 103.6, + "aa_max_len": 192.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "5": { + "seq_id": 5, + "locus_name": "locus_6", + "locus_name_alt": "SALM_640", + "locus_product": "^", + "locus_description": "MOSC domain-containing protein", + "locus_uid": "F", + "dna_seq_len": 543, + "dna_seq_hash": "a11561f2804e2c32c78049f8b9aeb517", + "aa_seq_len": 181, + "aa_seq_hash": "1f6a45ea291940ef2c17ec1cfdd5fbdd", + "dna_min_len": 380.1, + "dna_max_len": 705.9, + "aa_min_len": 126.7, + "aa_max_len": 235.3, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "6": { + "seq_id": 6, + "locus_name": "locus_7", + "locus_name_alt": "SALM_1501", + "locus_product": "&", + "locus_description": "India: Vellore", + "locus_uid": "G", + "dna_seq_len": 606, + "dna_seq_hash": "dc94bf1ec4ff9bed2a1f460cbd958656", + "aa_seq_len": 202, + "aa_seq_hash": "62252b3326997117f127efb88ff09294", + "dna_min_len": 424.2, + "dna_max_len": 787.8, + "aa_min_len": 141.4, + "aa_max_len": 262.6, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "7": { + "seq_id": 7, + "locus_name": "locus_8", + "locus_name_alt": "SALM_756", + "locus_product": "*", + "locus_description": "DNA polymerase III subunit delta'", + "locus_uid": "H", + "dna_seq_len": 642, + "dna_seq_hash": "7ebe74afecf146ec4db816c8deced64f", + "aa_seq_len": 214, + "aa_seq_hash": "2449629747a7c58f0f2cad411db87178", + "dna_min_len": 449.4, + "dna_max_len": 834.6, + "aa_min_len": 149.8, + "aa_max_len": 278.2, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "8": { + "seq_id": 8, + "locus_name": "locus_9", + "locus_name_alt": "SALM_7353", + "locus_product": "1", + "locus_description": "fimbrial assembly chaperone", + "locus_uid": "I", + "dna_seq_len": 684, + "dna_seq_hash": "41ebb36872854b2b33c8c028e23d8ad1", + "aa_seq_len": 228, + "aa_seq_hash": "a75a93991228940fb46d917f238beb5a", + "dna_min_len": 478.8, + "dna_max_len": 889.2, + "aa_min_len": 159.6, + "aa_max_len": 296.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "9": { + "seq_id": 9, + "locus_name": "locus_10", + "locus_name_alt": "SALM_1891", + "locus_product": "200.96", + "locus_description": "5'/3'-nucleotidase SurE", + "locus_uid": "J", + "dna_seq_len": 762, + "dna_seq_hash": "fe04d17ec353c08b903c85fc0ca4dc02", + "aa_seq_len": 254, + "aa_seq_hash": "bd09702e070040e0fc8d2ec3b830812c", + "dna_min_len": 533.4, + "dna_max_len": 990.6, + "aa_min_len": 177.8, + "aa_max_len": 330.2, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "10": { + "seq_id": 10, + "locus_name": "locus_11", + "locus_name_alt": "SALM_1452", + "locus_product": "|", + "locus_description": "1-phosphofructokinase", + "locus_uid": "K", + "dna_seq_len": 858, + "dna_seq_hash": "5b128d659955716833ce42f2bb060212", + "aa_seq_len": 286, + "aa_seq_hash": "4daecf1a6ccae76e1d97a4ce9ee4ff0b", + "dna_min_len": 600.6, + "dna_max_len": 1115.4, + "aa_min_len": 200.2, + "aa_max_len": 371.8, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "11": { + "seq_id": 11, + "locus_name": "locus_12", + "locus_name_alt": "SALM_11020", + "locus_product": "_", + "locus_description": "1", + "locus_uid": "L", + "dna_seq_len": 972, + "dna_seq_hash": "eb72da68c159497d5f0c8eeddc51b5ae", + "aa_seq_len": 324, + "aa_seq_hash": "a1f16dd269dc295e715f2d00f9c26b43", + "dna_min_len": 680.4, + "dna_max_len": 1263.6, + "aa_min_len": 226.8, + "aa_max_len": 421.2, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "12": { + "seq_id": 12, + "locus_name": "locus_13", + "locus_name_alt": "SALM_1934", + "locus_product": "-", + "locus_description": "3.9", + "locus_uid": "M", + "dna_seq_len": 1098, + "dna_seq_hash": "8f300259dcb46224bdc1fe5273107324", + "aa_seq_len": 366, + "aa_seq_hash": "b16bc26e42f5bc4327504b1ec8b2d53d", + "dna_min_len": 768.6, + "dna_max_len": 1427.4, + "aa_min_len": 256.2, + "aa_max_len": 475.8, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "13": { + "seq_id": 13, + "locus_name": "locus_14", + "locus_name_alt": "SALM_2871", + "locus_product": "+", + "locus_description": "@", + "locus_uid": "N", + "dna_seq_len": 1281, + "dna_seq_hash": "b9060019038526aa6fc38d2f7510edc6", + "aa_seq_len": 427, + "aa_seq_hash": "ed11561b2bedaa12c7a28eb0e9346101", + "dna_min_len": 896.7, + "dna_max_len": 1665.3, + "aa_min_len": 298.9, + "aa_max_len": 555.1, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "14": { + "seq_id": 14, + "locus_name": "locus_15", + "locus_name_alt": "SALM_583", + "locus_product": "=", + "locus_description": "DMT family transporter", + "locus_uid": "O", + "dna_seq_len": 1434, + "dna_seq_hash": "bc98c2fe196a68a79036814396513a8d", + "aa_seq_len": 478, + "aa_seq_hash": "16cb2acec887d5861327e04f2705b8ce", + "dna_min_len": 1003.8, + "dna_max_len": 1864.2, + "aa_min_len": 334.6, + "aa_max_len": 621.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "15": { + "seq_id": 15, + "locus_name": "locus_16", + "locus_name_alt": "SALM_780", + "locus_product": "<", + "locus_description": "murein transglycosylase A", + "locus_uid": "P", + "dna_seq_len": 1464, + "dna_seq_hash": "16e55766c603fe33c9e75d8e81743ae2", + "aa_seq_len": 488, + "aa_seq_hash": "b20314e55f9713235e9c4ea5817b56df", + "dna_min_len": 1024.8, + "dna_max_len": 1903.2, + "aa_min_len": 341.6, + "aa_max_len": 634.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "16": { + "seq_id": 16, + "locus_name": "locus_17", + "locus_name_alt": "SALM_1937", + "locus_product": ">", + "locus_description": "GTPase HflX", + "locus_uid": "Q", + "dna_seq_len": 1836, + "dna_seq_hash": "a0d97d985483413f3c18bfe5833ae9ce", + "aa_seq_len": 612, + "aa_seq_hash": "d6012eddc7a6e8d7d40761d00ed71a5a", + "dna_min_len": 1285.2, + "dna_max_len": 2386.8, + "aa_min_len": 428.4, + "aa_max_len": 795.6, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "17": { + "seq_id": 17, + "locus_name": "locus_18", + "locus_name_alt": "SALM_1997", + "locus_product": "?", + "locus_description": "biosynthetic arginine decarboxylase", + "locus_uid": "R", + "dna_seq_len": 1914, + "dna_seq_hash": "b3021e979faa7600756c06dfadfcf14c", + "aa_seq_len": 638, + "aa_seq_hash": "e68e83956ee1d4c685571e5348c8def1", + "dna_min_len": 1339.8, + "dna_max_len": 2488.2, + "aa_min_len": 446.6, + "aa_max_len": 829.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "18": { + "seq_id": 18, + "locus_name": "locus_19", + "locus_name_alt": "SALM_9926", + "locus_product": ",", + "locus_description": "https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662", + "locus_uid": "S", + "dna_seq_len": 2037, + "dna_seq_hash": "a012eee23637b48e39b00808a057e35d", + "aa_seq_len": 679, + "aa_seq_hash": "1302fea1dbd5bb756db34e09b863ad44", + "dna_min_len": 1425.9, + "dna_max_len": 2648.1, + "aa_min_len": 475.3, + "aa_max_len": 882.7, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "19": { + "seq_id": 19, + "locus_name": "locus_20", + "locus_name_alt": "SALM_6064", + "locus_product": ".", + "locus_description": "alpha-2-macroglobulin family protein", + "locus_uid": "T", + "dna_seq_len": 4935, + "dna_seq_hash": "4461918e985715e4a2b07494e1f91326", + "aa_seq_len": 1645, + "aa_seq_hash": "12ff39a1933fa478729b142976b0a659", + "dna_min_len": 3454.5, + "dna_max_len": 6415.5, + "aa_min_len": 1151.5, + "aa_max_len": 2138.5, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + } + }, + "query_data": { + "sample_name": "G11", + "query_seq_data": { + "0": { + "parent_id": "locus_1:0:0:0", + "locus_name": "locus_1:0:0:0", + "seq_id": "locus_1:0:0:0", + "dna_hash": "d17b02d12afa7f832ee37df6f24a8f55", + "dna_len": 102, + "aa_hash": "a931d1f75114576e60538364eb01a05f", + "aa_len": 34, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "1": { + "parent_id": "locus_10:9:0:1", + "locus_name": "locus_10:9:0:1", + "seq_id": "locus_10:9:0:1", + "dna_hash": "fe04d17ec353c08b903c85fc0ca4dc02", + "dna_len": 762, + "aa_hash": "988bf512f0362e276b0e5622fbaa7079", + "aa_len": 254, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "2": { + "parent_id": "locus_11:10:0:2", + "locus_name": "locus_11:10:0:2", + "seq_id": "locus_11:10:0:2", + "dna_hash": "c4266f2f24fdd8e039113c6b0955af9f", + "dna_len": 858, + "aa_hash": "9b9be0e0a2b6f84053716d6c14a0fb9a", + "aa_len": 286, + "start_codon": "gtg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "3": { + "parent_id": "locus_12:11:0:3", + "locus_name": "locus_12:11:0:3", + "seq_id": "locus_12:11:0:3", + "dna_hash": "eb72da68c159497d5f0c8eeddc51b5ae", + "dna_len": 972, + "aa_hash": "2fb77f6d0f615a030a08af74f3eda277", + "aa_len": 324, + "start_codon": "atg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "4": { + "parent_id": "locus_12:11:0:4", + "locus_name": "locus_12:11:0:4", + "seq_id": "locus_12:11:0:4", + "dna_hash": "eb72da68c159497d5f0c8eeddc51b5ae", + "dna_len": 972, + "aa_hash": "2fb77f6d0f615a030a08af74f3eda277", + "aa_len": 324, + "start_codon": "atg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "5": { + "parent_id": "locus_13:12:0:5", + "locus_name": "locus_13:12:0:5", + "seq_id": "locus_13:12:0:5", + "dna_hash": "8f300259dcb46224bdc1fe5273107324", + "dna_len": 1098, + "aa_hash": "fd5140611af6e0d9f95426bf9beaa8b1", + "aa_len": 366, + "start_codon": "atg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "6": { + "parent_id": "locus_14:13:0:6", + "locus_name": "locus_14:13:0:6", + "seq_id": "locus_14:13:0:6", + "dna_hash": "2fa0b06ed72e36b4071cab9d0b4f87d0", + "dna_len": 1281, + "aa_hash": "bf5190f310477277da454725d434a8ee", + "aa_len": 427, + "start_codon": "ttg", + "stop_codon": "tag", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "7": { + "parent_id": "locus_15:14:0:7", + "locus_name": "locus_15:14:0:7", + "seq_id": "locus_15:14:0:7", + "dna_hash": "bc98c2fe196a68a79036814396513a8d", + "dna_len": 1434, + "aa_hash": "5d8c3cf06444c3741ebfc57cc92db0d3", + "aa_len": 478, + "start_codon": "gtg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "8": { + "parent_id": "locus_16:15:0:8", + "locus_name": "locus_16:15:0:8", + "seq_id": "locus_16:15:0:8", + "dna_hash": "a9b3cb97dac3cda6e932a49bf9a507bd", + "dna_len": 1464, + "aa_hash": "3ca5f1d7b46eda9460608ef61603c12f", + "aa_len": 488, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "9": { + "parent_id": "locus_17:16:0:9", + "locus_name": "locus_17:16:0:9", + "seq_id": "locus_17:16:0:9", + "dna_hash": "a0d97d985483413f3c18bfe5833ae9ce", + "dna_len": 1836, + "aa_hash": "bdd2b967a7a6337b5973dd723a62fa43", + "aa_len": 612, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "10": { + "parent_id": "locus_18:17:0:10", + "locus_name": "locus_18:17:0:10", + "seq_id": "locus_18:17:0:10", + "dna_hash": "b3021e979faa7600756c06dfadfcf14c", + "dna_len": 1914, + "aa_hash": "42c4a831ee79a27c47138fe96829814b", + "aa_len": 638, + "start_codon": "atg", + "stop_codon": "tag", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "11": { + "parent_id": "locus_19:18:0:11", + "locus_name": "locus_19:18:0:11", + "seq_id": "locus_19:18:0:11", + "dna_hash": "de32372598811d63bcc1a0eaf6872644", + "dna_len": 2037, + "aa_hash": "a48a4e4dc8c7f61a7be06a7f72142198", + "aa_len": 679, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "12": { + "parent_id": "locus_2:1:0:12", + "locus_name": "locus_2:1:0:12", + "seq_id": "locus_2:1:0:12", + "dna_hash": "8b70e777f6bbf2c91ff75947824b5976", + "dna_len": 285, + "aa_hash": "6e403f4ed2da629ea2ebfe18278ed120", + "aa_len": 95, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "13": { + "parent_id": "locus_20:19:0:13", + "locus_name": "locus_20:19:0:13", + "seq_id": "locus_20:19:0:13", + "dna_hash": "4461918e985715e4a2b07494e1f91326", + "dna_len": 4935, + "aa_hash": "ffe361d9273afbab0ed9009dd5aa4041", + "aa_len": 1645, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "14": { + "parent_id": "locus_3:2:0:14", + "locus_name": "locus_3:2:0:14", + "seq_id": "locus_3:2:0:14", + "dna_hash": "670705cd2a59c4a23a897ac656a888fe", + "dna_len": 327, + "aa_hash": "f46f4f7b2362abaf1440c9752e10670e", + "aa_len": 109, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "15": { + "parent_id": "locus_4:3:0:15", + "locus_name": "locus_4:3:0:15", + "seq_id": "locus_4:3:0:15", + "dna_hash": "73790840c76943caac0ebb3b2b3f0b98", + "dna_len": 417, + "aa_hash": "77784601d754a5f36152853592023b08", + "aa_len": 139, + "start_codon": "ctg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "16": { + "parent_id": "locus_5:4:0:16", + "locus_name": "locus_5:4:0:16", + "seq_id": "locus_5:4:0:16", + "dna_hash": "8cf4341689dd00f74adfcc43d1f4a35e", + "dna_len": 444, + "aa_hash": "736cc3184dda2c5ac596f76753272622", + "aa_len": 148, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "17": { + "parent_id": "locus_6:5:0:17", + "locus_name": "locus_6:5:0:17", + "seq_id": "locus_6:5:0:17", + "dna_hash": "a11561f2804e2c32c78049f8b9aeb517", + "dna_len": 543, + "aa_hash": "f7a904be6607ca5a6d9388ad9ca3693a", + "aa_len": 181, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "18": { + "parent_id": "locus_7:6:0:18", + "locus_name": "locus_7:6:0:18", + "seq_id": "locus_7:6:0:18", + "dna_hash": "49d9878c9d3071aa1d2f26cb947b784c", + "dna_len": 606, + "aa_hash": "a1169e1ef4c2882247a9349da07cb6bd", + "aa_len": 202, + "start_codon": "gtg", + "stop_codon": "tag", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "19": { + "parent_id": "locus_8:7:0:19", + "locus_name": "locus_8:7:0:19", + "seq_id": "locus_8:7:0:19", + "dna_hash": "7ebe74afecf146ec4db816c8deced64f", + "dna_len": 642, + "aa_hash": "ad5a47cb3a55985083b963b0cf67f382", + "aa_len": 214, + "start_codon": "atg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "20": { + "parent_id": "locus_9:8:0:20", + "locus_name": "locus_9:8:0:20", + "seq_id": "locus_9:8:0:20", + "dna_hash": "41ebb36872854b2b33c8c028e23d8ad1", + "dna_len": 684, + "aa_hash": "02989d1dbb505d15f06e76ddc9174b7e", + "aa_len": 228, + "start_codon": "atg", + "stop_codon": "tag", + "count_internal_stop": 0, + "dna_ambig_count": 0 + } + }, + "query_hit_columns": [], + "query_hits": { + "0": { + "nucleotide": [ + { + "qseqid": 0, + "sseqid": 0, + "qlen": 102, + "slen": 102, + "qstart": 1, + "qend": 102, + "sstart": 1, + "send": 102, + "length": 102, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 1.8e-51, + "bitscore": 189 + } + ], + "protein": [ + { + "qseqid": 0, + "sseqid": 0, + "qlen": 34, + "slen": 34, + "qstart": 1, + "qend": 34, + "sstart": 1, + "send": 34, + "length": 34, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 4.94e-20, + "bitscore": 64.3 + } + ] + }, + "1": { + "nucleotide": [ + { + "qseqid": 1, + "sseqid": 9, + "qlen": 762, + "slen": 762, + "qstart": 1, + "qend": 762, + "sstart": 1, + "send": 762, + "length": 762, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1408 + } + ], + "protein": [ + { + "qseqid": 1, + "sseqid": 9, + "qlen": 254, + "slen": 254, + "qstart": 1, + "qend": 254, + "sstart": 1, + "send": 254, + "length": 254, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 514.0 + } + ] + }, + "2": { + "nucleotide": [ + { + "qseqid": 2, + "sseqid": 10, + "qlen": 858, + "slen": 858, + "qstart": 1, + "qend": 858, + "sstart": 1, + "send": 858, + "length": 858, + "mismatch": 19, + "pident": 97.786, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1480 + } + ], + "protein": [ + { + "qseqid": 2, + "sseqid": 10, + "qlen": 286, + "slen": 286, + "qstart": 1, + "qend": 286, + "sstart": 1, + "send": 286, + "length": 286, + "mismatch": 17, + "pident": 94.056, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 518.0 + } + ] + }, + "3": { + "nucleotide": [ + { + "qseqid": 3, + "sseqid": 11, + "qlen": 972, + "slen": 972, + "qstart": 1, + "qend": 972, + "sstart": 1, + "send": 972, + "length": 972, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1796 + } + ], + "protein": [ + { + "qseqid": 3, + "sseqid": 11, + "qlen": 324, + "slen": 324, + "qstart": 1, + "qend": 324, + "sstart": 1, + "send": 324, + "length": 324, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 640.0 + } + ] + }, + "4": { + "nucleotide": [ + { + "qseqid": 4, + "sseqid": 11, + "qlen": 972, + "slen": 972, + "qstart": 1, + "qend": 972, + "sstart": 1, + "send": 972, + "length": 972, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1796 + } + ], + "protein": [ + { + "qseqid": 4, + "sseqid": 11, + "qlen": 324, + "slen": 324, + "qstart": 1, + "qend": 324, + "sstart": 1, + "send": 324, + "length": 324, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 640.0 + } + ] + }, + "5": { + "nucleotide": [ + { + "qseqid": 5, + "sseqid": 12, + "qlen": 1098, + "slen": 1098, + "qstart": 1, + "qend": 1098, + "sstart": 1, + "send": 1098, + "length": 1098, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 2028 + } + ], + "protein": [ + { + "qseqid": 5, + "sseqid": 12, + "qlen": 366, + "slen": 366, + "qstart": 1, + "qend": 366, + "sstart": 1, + "send": 366, + "length": 366, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 754.0 + } + ] + }, + "6": { + "nucleotide": [ + { + "qseqid": 6, + "sseqid": 13, + "qlen": 1281, + "slen": 1281, + "qstart": 1, + "qend": 1281, + "sstart": 1, + "send": 1281, + "length": 1281, + "mismatch": 11, + "pident": 99.141, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 2305 + } + ], + "protein": [ + { + "qseqid": 6, + "sseqid": 13, + "qlen": 427, + "slen": 427, + "qstart": 1, + "qend": 427, + "sstart": 1, + "send": 427, + "length": 427, + "mismatch": 9, + "pident": 97.892, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 846.0 + } + ] + }, + "7": { + "nucleotide": [ + { + "qseqid": 7, + "sseqid": 14, + "qlen": 1434, + "slen": 1434, + "qstart": 1, + "qend": 1434, + "sstart": 1, + "send": 1434, + "length": 1434, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 2649 + } + ], + "protein": [ + { + "qseqid": 7, + "sseqid": 14, + "qlen": 478, + "slen": 478, + "qstart": 1, + "qend": 478, + "sstart": 1, + "send": 478, + "length": 478, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 972.0 + } + ] + }, + "8": { + "nucleotide": [ + { + "qseqid": 8, + "sseqid": 15, + "qlen": 1464, + "slen": 1464, + "qstart": 1, + "qend": 1464, + "sstart": 1, + "send": 1464, + "length": 1464, + "mismatch": 15, + "pident": 98.975, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 2621 + } + ], + "protein": [ + { + "qseqid": 8, + "sseqid": 15, + "qlen": 488, + "slen": 488, + "qstart": 1, + "qend": 488, + "sstart": 1, + "send": 488, + "length": 488, + "mismatch": 14, + "pident": 97.131, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 972.0 + } + ] + }, + "9": { + "nucleotide": [ + { + "qseqid": 9, + "sseqid": 16, + "qlen": 1836, + "slen": 1836, + "qstart": 1, + "qend": 1836, + "sstart": 1, + "send": 1836, + "length": 1836, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 3391 + } + ], + "protein": [ + { + "qseqid": 9, + "sseqid": 16, + "qlen": 612, + "slen": 612, + "qstart": 1, + "qend": 612, + "sstart": 1, + "send": 612, + "length": 612, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 1241.0 + } + ] + }, + "10": { + "nucleotide": [ + { + "qseqid": 10, + "sseqid": 17, + "qlen": 1914, + "slen": 1914, + "qstart": 1, + "qend": 1914, + "sstart": 1, + "send": 1914, + "length": 1914, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 3535 + } + ], + "protein": [ + { + "qseqid": 10, + "sseqid": 17, + "qlen": 638, + "slen": 638, + "qstart": 1, + "qend": 638, + "sstart": 1, + "send": 638, + "length": 638, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 1326.0 + } + ] + }, + "11": { + "nucleotide": [ + { + "qseqid": 11, + "sseqid": 18, + "qlen": 2037, + "slen": 2037, + "qstart": 1, + "qend": 2037, + "sstart": 1, + "send": 2037, + "length": 2037, + "mismatch": 16, + "pident": 99.215, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 3674 + } + ], + "protein": [ + { + "qseqid": 11, + "sseqid": 18, + "qlen": 679, + "slen": 679, + "qstart": 1, + "qend": 679, + "sstart": 1, + "send": 679, + "length": 679, + "mismatch": 8, + "pident": 98.822, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 1403.0 + } + ] + }, + "12": { + "nucleotide": [ + { + "qseqid": 12, + "sseqid": 1, + "qlen": 285, + "slen": 285, + "qstart": 1, + "qend": 285, + "sstart": 1, + "send": 285, + "length": 285, + "mismatch": 17, + "pident": 94.035, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 2.3100000000000004e-124, + "bitscore": 433 + } + ], + "protein": [ + { + "qseqid": 12, + "sseqid": 1, + "qlen": 95, + "slen": 95, + "qstart": 1, + "qend": 95, + "sstart": 1, + "send": 95, + "length": 95, + "mismatch": 11, + "pident": 88.421, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 2.89e-51, + "bitscore": 147.0 + } + ] + }, + "13": { + "nucleotide": [ + { + "qseqid": 13, + "sseqid": 19, + "qlen": 4935, + "slen": 4935, + "qstart": 1, + "qend": 4935, + "sstart": 1, + "send": 4935, + "length": 4935, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 9114 + } + ], + "protein": [ + { + "qseqid": 13, + "sseqid": 19, + "qlen": 1645, + "slen": 1645, + "qstart": 1, + "qend": 1645, + "sstart": 1, + "send": 1645, + "length": 1645, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 3332.0 + } + ] + }, + "14": { + "nucleotide": [ + { + "qseqid": 14, + "sseqid": 2, + "qlen": 327, + "slen": 327, + "qstart": 1, + "qend": 327, + "sstart": 1, + "send": 327, + "length": 327, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 5.34e-176, + "bitscore": 604 + } + ], + "protein": [ + { + "qseqid": 14, + "sseqid": 2, + "qlen": 109, + "slen": 109, + "qstart": 1, + "qend": 109, + "sstart": 1, + "send": 109, + "length": 109, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 3.09e-81, + "bitscore": 224.0 + } + ] + }, + "15": { + "nucleotide": [ + { + "qseqid": 15, + "sseqid": 3, + "qlen": 417, + "slen": 417, + "qstart": 1, + "qend": 417, + "sstart": 1, + "send": 417, + "length": 417, + "mismatch": 11, + "pident": 97.362, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 710 + } + ], + "protein": [ + { + "qseqid": 15, + "sseqid": 3, + "qlen": 139, + "slen": 139, + "qstart": 1, + "qend": 139, + "sstart": 1, + "send": 139, + "length": 139, + "mismatch": 9, + "pident": 93.525, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 3.6999999999999996e-98, + "bitscore": 269.0 + } + ] + }, + "16": { + "nucleotide": [ + { + "qseqid": 16, + "sseqid": 4, + "qlen": 444, + "slen": 444, + "qstart": 1, + "qend": 444, + "sstart": 1, + "send": 444, + "length": 444, + "mismatch": 15, + "pident": 96.622, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 737 + } + ], + "protein": [ + { + "qseqid": 16, + "sseqid": 4, + "qlen": 148, + "slen": 148, + "qstart": 1, + "qend": 148, + "sstart": 1, + "send": 148, + "length": 148, + "mismatch": 11, + "pident": 92.568, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 1.5600000000000001e-97, + "bitscore": 268.0 + } + ] + }, + "17": { + "nucleotide": [ + { + "qseqid": 17, + "sseqid": 5, + "qlen": 543, + "slen": 543, + "qstart": 1, + "qend": 543, + "sstart": 1, + "send": 543, + "length": 543, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1003 + } + ], + "protein": [ + { + "qseqid": 17, + "sseqid": 5, + "qlen": 181, + "slen": 181, + "qstart": 1, + "qend": 181, + "sstart": 1, + "send": 181, + "length": 181, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 1.7899999999999999e-140, + "bitscore": 380.0 + } + ] + }, + "18": { + "nucleotide": [ + { + "qseqid": 18, + "sseqid": 6, + "qlen": 606, + "slen": 606, + "qstart": 1, + "qend": 606, + "sstart": 1, + "send": 606, + "length": 606, + "mismatch": 15, + "pident": 97.525, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1037 + } + ], + "protein": [ + { + "qseqid": 18, + "sseqid": 6, + "qlen": 202, + "slen": 202, + "qstart": 1, + "qend": 202, + "sstart": 1, + "send": 202, + "length": 202, + "mismatch": 14, + "pident": 93.069, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 2.36e-141, + "bitscore": 384.0 + } + ] + }, + "19": { + "nucleotide": [ + { + "qseqid": 19, + "sseqid": 7, + "qlen": 642, + "slen": 642, + "qstart": 1, + "qend": 642, + "sstart": 1, + "send": 642, + "length": 642, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1186 + } + ], + "protein": [ + { + "qseqid": 19, + "sseqid": 7, + "qlen": 214, + "slen": 214, + "qstart": 1, + "qend": 214, + "sstart": 1, + "send": 214, + "length": 214, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 1.75e-157, + "bitscore": 426.0 + } + ] + }, + "20": { + "nucleotide": [ + { + "qseqid": 20, + "sseqid": 8, + "qlen": 684, + "slen": 684, + "qstart": 1, + "qend": 684, + "sstart": 1, + "send": 684, + "length": 684, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1264 + } + ], + "protein": [ + { + "qseqid": 20, + "sseqid": 8, + "qlen": 228, + "slen": 228, + "qstart": 1, + "qend": 228, + "sstart": 1, + "send": 228, + "length": 228, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 6.32e-172, + "bitscore": 463.0 + } + ] + } + }, + "locus_profile": { + "locus_1": { + "nucleotide": [ + "0" + ], + "protein": [ + "0" + ] + }, + "locus_2": { + "nucleotide": [ + "12" + ], + "protein": [ + "12" + ] + }, + "locus_3": { + "nucleotide": [ + "14" + ], + "protein": [ + "14" + ] + }, + "locus_4": { + "nucleotide": [ + "15" + ], + "protein": [ + "15" + ] + }, + "locus_5": { + "nucleotide": [ + "16" + ], + "protein": [ + "16" + ] + }, + "locus_6": { + "nucleotide": [ + "17" + ], + "protein": [ + "17" + ] + }, + "locus_7": { + "nucleotide": [ + "18" + ], + "protein": [ + "18" + ] + }, + "locus_8": { + "nucleotide": [ + "19" + ], + "protein": [ + "19" + ] + }, + "locus_9": { + "nucleotide": [ + "20" + ], + "protein": [ + "20" + ] + }, + "locus_10": { + "nucleotide": [ + "1" + ], + "protein": [ + "1" + ] + }, + "locus_11": { + "nucleotide": [ + "2" + ], + "protein": [ + "2" + ] + }, + "locus_12": { + "nucleotide": [ + "4", + "3" + ], + "protein": [ + "4", + "3" + ] + }, + "locus_13": { + "nucleotide": [ + "5" + ], + "protein": [ + "5" + ] + }, + "locus_14": { + "nucleotide": [ + "6" + ], + "protein": [ + "6" + ] + }, + "locus_15": { + "nucleotide": [ + "7" + ], + "protein": [ + "7" + ] + }, + "locus_16": { + "nucleotide": [ + "8" + ], + "protein": [ + "8" + ] + }, + "locus_17": { + "nucleotide": [ + "9" + ], + "protein": [ + "9" + ] + }, + "locus_18": { + "nucleotide": [ + "10" + ], + "protein": [ + "10" + ] + }, + "locus_19": { + "nucleotide": [ + "11" + ], + "protein": [ + "11" + ] + }, + "locus_20": { + "nucleotide": [ + "13" + ], + "protein": [ + "13" + ] + } + } + }, + "query_hit_columns": [ + "qseqid", + "sseqid", + "qlen", + "slen", + "qstart", + "qend", + "sstart", + "send", + "length", + "mismatch", + "pident", + "qcovhsp", + "qcovs", + "sstrand", + "evalue", + "bitscore" + ] +} \ No newline at end of file diff --git a/tests/test_data/outputs/search/G12/blast/nucleotide/hsps.txt b/tests/test_data/outputs/search/G12/blast/nucleotide/hsps.txt new file mode 100755 index 0000000..9ebac93 --- /dev/null +++ b/tests/test_data/outputs/search/G12/blast/nucleotide/hsps.txt @@ -0,0 +1,21 @@ +0 0 102 102 1 102 1 102 102 0 100.000 100 100 plus 1.80e-51 189 +1 9 762 762 1 762 1 762 762 0 100.000 100 100 plus 0.0 1408 +2 10 858 858 1 858 1 858 858 0 100.000 100 100 plus 0.0 1585 +3 11 972 972 1 972 1 972 972 0 100.000 100 100 plus 0.0 1796 +4 11 972 972 1 972 1 972 972 0 100.000 100 100 plus 0.0 1796 +5 12 1098 1098 1 1098 1 1098 1098 0 100.000 100 100 plus 0.0 2028 +6 13 1281 1281 1 1281 1 1281 1281 0 100.000 100 100 plus 0.0 2366 +7 14 1434 1434 1 1434 1 1434 1434 0 100.000 100 100 plus 0.0 2649 +8 15 1464 1464 1 1464 1 1464 1464 0 100.000 100 100 plus 0.0 2704 +9 16 1836 1836 1 1836 1 1836 1836 0 100.000 100 100 plus 0.0 3391 +10 17 1914 1914 1 1914 1 1914 1914 0 100.000 100 100 plus 0.0 3535 +11 18 2037 2037 1 2037 1 2037 2037 0 100.000 100 100 plus 0.0 3762 +12 1 285 285 1 285 1 285 285 0 100.000 100 100 plus 1.03e-152 527 +13 19 4935 4935 1 4935 1 4935 4935 0 100.000 100 100 plus 0.0 9114 +14 2 327 327 1 327 1 327 327 0 100.000 100 100 plus 5.34e-176 604 +15 3 417 417 1 417 1 417 417 0 100.000 100 100 plus 0.0 771 +16 4 444 444 1 444 1 444 444 0 100.000 100 100 plus 0.0 821 +17 5 543 543 1 543 1 543 543 0 100.000 100 100 plus 0.0 1003 +18 6 606 606 1 606 1 606 606 0 100.000 100 100 plus 0.0 1120 +19 7 642 642 1 642 1 642 642 0 100.000 100 100 plus 0.0 1186 +20 8 684 684 1 684 1 684 684 0 100.000 100 100 plus 0.0 1264 diff --git a/tests/test_data/outputs/search/G12/blast/nucleotide/queries.fasta b/tests/test_data/outputs/search/G12/blast/nucleotide/queries.fasta new file mode 100755 index 0000000..80602ca --- /dev/null +++ b/tests/test_data/outputs/search/G12/blast/nucleotide/queries.fasta @@ -0,0 +1,42 @@ +>0 +atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa +>1 +atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa +>2 +gtgaacctggtgaaaaccccgggtctgcacgcggcgggcaaaggcattaacgttgccaatgtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccaacttcaactttcccggctttgacgtgactcccgcagactgggaacgctttgttaacgacttcctgagctggctgggtcagttcgatatggtctgcgtaagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatcgttgatactagccgtgtagagttagtcgccggtcttatagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcccgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgcgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggaaaataccgtcggcgcaggcggttccatggttggcggatggatttacggcctgctgatgcgcgagtccaccgaacatacgctgcgcctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga +>3 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>4 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>5 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>6 +ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctattacgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccagcgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcggaaaggcgggattggttcgcgcggtccgggtgaaacccagctcgaagccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgagaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgcagctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaattccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtccgcaggtacgaatgccgattgtcgactggcgtcgcttctgtaaacaagaaccagcgttgatcgaatacgtgatctag +>7 +gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga +>8 +atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctcgtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatctgcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacggtgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcactcaaccggaagggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaaggctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgatgcgctgtcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtaagctgatcagccttgtgcgcaaccttaatcaactgctcaaaagcgagcacgaacgttataacaaataccgcacgagcctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatctgcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcatagcccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttctggccgacagcatcccacacagaaagaggaataa +>9 +atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa +>10 +atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag +>11 +atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacatcgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccacagtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggtcgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattacttgaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcatacagggcggtacggaagtttgtcagcaaaaactggataacatgcgcaacgcaggcgtaaaagtgtacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggggtcagcgcagagctcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacagaagaccggcaagctcggcgagatcctgttctttatgcgggcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccagcaatgactggcagttcgacggcgacgcggaaactattgcacactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgagatttccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa +>12 +atgaaactttttctgacaacggcagcgctaactgcgaccttgacttccggcatgggatttgccagcgataccgtgatcccctgggcgactaacagtggcggtacggaaagcacgcatatcgcggcaatgggcgaggatgtgaatgcgcagcatcagcacatcatccacacgcatgaaggcgtatgtgccgcaaattccggcaccattcaggctgatgaagccgcgctgaccagcaacaagccaccggtacaagtccaaccggaattgttgccgcatcagggctaa +>13 +atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa +>14 +atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa +>15 +ctgaaactaatacctttttatttgctcgcgctattttctgcctcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaacccctgcagataacccgccttataccgtgacaatattgccgccagtagaagcctgcggggaaaaatgtgtgaaactgaacgtagagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaacgacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctacaaccgacagctgcaattacttagggctggcgtaataccgaaaaataaaaagtggaaatggataacgaagatggaaaattactggttcaggcgggtaaaagcgcactaa +>16 +atgaaaaaatttgatagctgtcatcctgtctttttactgatcggctgtgcccaggtgcctctccgttcctccgtgagcaaaccggtacagcaacctagcgctcagaaagagcaactggccaacgcaaatggtattgatgagtgtcagtcacttccgtatgtgccttcagaccttgcgaagaataaatcattatcaaaccagatcgctgataataccgcatcaaaaaatagcgcaatcagctcacgcattttttgcgaaaaatataagcaaaccaaagaacaggcgcttaccttcttccaggaacttccacaatacatgcgttcgaaagaagttgaagagcaacacatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa +>17 +atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa +>18 +gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggactcacaacatgcctgtagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgccagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcgccatccagtcggcaacctggttttcggcaggcaggtcgcgtagggcggttaaaaaaatgatggattgcggcagcggtatttattcttcaatcaatacgttactgaaaaattcgcagattaaaaatatagtcattttaacccataatcattgtttgacgtatattgtaaaaaataagcgcggcgtgaagtttgatccggagtatcttaatgcgctcgttatgtatgcggaaaacggaaaactccttttagacggtgaatttgttcccggctag +>19 +atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga +>20 +atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/search/G12/blast/protein/hsps.txt b/tests/test_data/outputs/search/G12/blast/protein/hsps.txt new file mode 100755 index 0000000..553fd81 --- /dev/null +++ b/tests/test_data/outputs/search/G12/blast/protein/hsps.txt @@ -0,0 +1,21 @@ +0 0 34 34 1 34 1 34 34 0 100.000 100 100 N/A 4.94e-20 64.3 +1 9 254 254 1 254 1 254 254 0 100.000 100 100 N/A 0.0 514 +2 10 286 286 1 286 1 286 286 0 100.000 100 100 N/A 0.0 579 +3 11 324 324 1 324 1 324 324 0 100.000 100 100 N/A 0.0 640 +4 11 324 324 1 324 1 324 324 0 100.000 100 100 N/A 0.0 640 +5 12 366 366 1 366 1 366 366 0 100.000 100 100 N/A 0.0 754 +6 13 427 427 1 427 1 427 427 0 100.000 100 100 N/A 0.0 869 +7 14 478 478 1 478 1 478 478 0 100.000 100 100 N/A 0.0 972 +8 15 488 488 1 488 1 488 488 0 100.000 100 100 N/A 0.0 1004 +9 16 612 612 1 612 1 612 612 0 100.000 100 100 N/A 0.0 1241 +10 17 638 638 1 638 1 638 638 0 100.000 100 100 N/A 0.0 1326 +11 18 679 679 1 679 1 679 679 0 100.000 100 100 N/A 0.0 1419 +12 1 95 95 1 95 1 95 95 0 100.000 100 100 N/A 1.23e-68 191 +13 19 1645 1645 1 1645 1 1645 1645 0 100.000 100 100 N/A 0.0 3332 +14 2 109 109 1 109 1 109 109 0 100.000 100 100 N/A 3.09e-81 224 +15 3 139 139 1 139 1 139 139 0 100.000 100 100 N/A 2.34e-106 290 +16 4 148 148 1 148 1 148 148 0 100.000 100 100 N/A 1.04e-110 302 +17 5 181 181 1 181 1 181 181 0 100.000 100 100 N/A 1.79e-140 380 +18 6 202 202 1 202 1 202 202 0 100.000 100 100 N/A 3.71e-154 416 +19 7 214 214 1 214 1 214 214 0 100.000 100 100 N/A 1.75e-157 426 +20 8 228 228 1 228 1 228 228 0 100.000 100 100 N/A 6.32e-172 463 diff --git a/tests/test_data/outputs/search/G12/blast/protein/queries.fasta b/tests/test_data/outputs/search/G12/blast/protein/queries.fasta new file mode 100755 index 0000000..a75c35c --- /dev/null +++ b/tests/test_data/outputs/search/G12/blast/protein/queries.fasta @@ -0,0 +1,42 @@ +>0 +MYDPPFLEALMITASFFAIFIIIVVSVLLLEGD* +>1 +MRILLSNDDGVHAPGIQTLAKALREFADVQVVAPDRNRSGASNSLTLESSLRTFTFDNGDIAVQMGTPTDCVYLGVNALMRPRPDIVVSGINAGPNLGDDVIYSGTVAAAMEGRHLGFPALAVSLNGYQHYDTAAAVTCALLRGLSREPLRTGRILNVNVPDLPLAQVKGIRVTRCGSRHPADKVIPQEDPRGNTLYWIGPPGDKYDAGPDTDFAAVDEGYVSVTPLHVDLTAHSAHDVVSDWLDSVGVGTQW* +>2 +VNLVKTPGLHAAGKGINVANVLKDLGIDVTVGGFLGKDNQDGFQQLFSELGIANRFQVVQGRTRINVKLTEKDGEVTNFNFPGFDVTPADWERFVNDFLSWLGQFDMVCVSGSLPAGVSPEAFTDWMTRLRSQCPCIIVDTSRVELVAGLIAAPWLVKPNRRELEIWAGRKLPEMKDVIDAAPALREQGIAHVVISLGAEGALRVNASGEWIAKPPAVDVENTVGAGGSMVGGWIYGLLMRESTEHTLRLATAVAALAVSQSNVGITDRPQLAAMMARVDLQPFN* +>3 +MNKNKYSTPLLMLATILAGMLSPMQSAVNGQLGHWLQDGNACAVISFASGLVVMFFIIIARKETRQQFASIPTLIKKRKIPLWNWFAGLCGAMVVFSEGASASALGVATFQTALISALLLSGLLCDRFGIGVEEKKYFTPWRITGALFAVIATIFVVSPQWHSTSFILLAILPFLAGLLAGWQPAGNAKVAEATGSMLVSITWNFIVGFCVLGAALAIRIALGHVTIQLPDTWWMYLGGPLGLLSIGLMAILVRGLGLLMLGVASTAGQLLGSVLIDELIPSLGNTVYLVTIIGTLFALVGAIVTTIPEYRASKMAQKMEVSE* +>4 +MNKNKYSTPLLMLATILAGMLSPMQSAVNGQLGHWLQDGNACAVISFASGLVVMFFIIIARKETRQQFASIPTLIKKRKIPLWNWFAGLCGAMVVFSEGASASALGVATFQTALISALLLSGLLCDRFGIGVEEKKYFTPWRITGALFAVIATIFVVSPQWHSTSFILLAILPFLAGLLAGWQPAGNAKVAEATGSMLVSITWNFIVGFCVLGAALAIRIALGHVTIQLPDTWWMYLGGPLGLLSIGLMAILVRGLGLLMLGVASTAGQLLGSVLIDELIPSLGNTVYLVTIIGTLFALVGAIVTTIPEYRASKMAQKMEVSE* +>5 +MKGRWAKYVATGVMLAMLAACSSKPTDRGQQYKDGKFTQPFSLVNQPDAVGAPINAGDFAEQVDQIRSASPRLYTNQSNVYNAVQNWLRSGGDTRTMRQFGIDAWQMEGTDNYGNVQFTGYYTPVVQARHTRQGAFQYPIYSMPPKRGRLPSRAQIYAGALSDKYILAWSNSLMDNFIMDVQGSGYIDFGDGSPLNFFSYAGKNGWPYRSIGKVLIDRGEVKKEDMSMQAIREWGEKHSEAEVRELLEQNPSFVFFKPQSFAPVKGASAVPLIGRASVASDRSIIPPGTTLLAEVPLLDNNGKFSGQYELRLMVALDVGGAIKGQHFDIYQGIGPDAGHRAGWYNHYGRVWVLKSAPGAGNVFSG* +>6 +LFDRYDAGEQAVLVHIYFSQDKDMEDLQEFESLVYYAGVEAMQVITGSRKAPHPKYFVGEGKAVEIAEAVKATGAAVVLFDHALSPAQERNLERLCECRVIDRTGLILDIFAQRARTHEGKLQVELAQLRHLATRLVRGWTHLERRKGGIGSRGPGETQLEADRRLLRNRIVQIQSRLEKVEKQREQGRQSRIKADVPTVSLVGYTNAGKSTLFNQITEARVYAADQLFATLDPTLRRIDVADVGETVLADTVGFIRHLPYDLVAAFKATLQETRQATQLLHVVDAADVRVQENIEAVNTVLEEIDAHEFPTLMVMNKIDMLDDFEPRIDRDEENKPIRVWLSAQSGVGIPQLFQALTERLSGEVAQHTLRLLPQEGRLRSRFYQLQAIEKEWMEEDGSVSPQVRMPIVDWRRFCKQEPALIEYVI* +>7 +VAQRILVLGASGYIGQHLVFALSQQGHQVRAAARRVERLEKHRLANVSCHKVDLHWPENLPALLRDIDTVYYLVHGMGEGGDFIAHERQAALNVRDALRQTPVKQLIFLSSLQAPAHEQSDHLRARQLTADTLRDAGVPVTELRAGIIVGAGSAAFEVMRDMVYNLPILTPPRWVRSRTTPIALENLLYYLVGLLEHPAHEHRILEAAGPQVLSYQQQFERFMAVSGKRRPLIPVPFPTRWISVWFLNVITSVPPTTAKALIQGLRHDLLADDAALKKLIPQTLITFDDAVRRTLKEEEKLVNSSDWGYDALAFARWRPEYGYFPKQAGFTAQTPASLSALWQVVNRLGGKEGYFFGNILWQTRAAMDRLVGHKLAKGRPSHTLLKPGDTVDSWKVIIVEPEKQLTLLFGMKAPGLGRLSFTLHDKGRYREIDVRAWWHPHGMPGLIYWLLMIPAHLFIFRGMARRIARLAEQITEK* +>8 +MNKFARHFLPLSLRVRFLLATAGVVLVLSLAYGIVALVGYSVSFDKTTFRLLRGESNLFYTLAKWENNKICVELPENLDMQSPTMTVIYDETGKLLWTQRNIPWLIKSTQPEGLKTNGFHEIETNVDATSTLLSEDHSAQEKLKEVREDDDDAEMTHSVAVNIYPATTRMPQLTIVVVDTIPIELKGSYMVWSWFVYVLAANLLLVIPLLWIAAWWSLRPIDALSREVRELEDHHREMLNPETTRKLISLVRNLNQLLKSEHERYNKYRTSLTDLTHSLKTPLAVLQSTLRSLRNEKMSVSKAEPVMLEQISRISQQIGYYLHRASMRGSGVLLSRELHPVAPLLDNLISALNKVYLRKGVNISMDISPEISFVGEQNDFVEVMGNVLDNACKYCLEFVEISARQTDDHLHIFVEDDGPGIAHSKRSLVFDRGQRADTLRPGQGVGLAVAREITEQYAGQIIASDSLLGGARMEVVSGRQHPTQKEE* +>9 +MTIQKRLLEAVEQKLLRPIDAQFALTVAGNDDPAVTLAAALLSHDAGEGHVCLPLSRLTLTEEAHPLLVAWISETATPIDWKKRLLASAAVSCGDSPAPLILCGDRLYLNRMWCNERTVARFFNEVNQAIAVDEDQLSRILDALFPPTDEVNWQKVAAAVALTRRISVISGGPGTGKTTTVAKLLAALIQMADGERCRIRLAAPTGKAAARLTESLGAALRQLPLTDAQKKRIPEDASTLHRLLGAQPGSQRLRHHAGNPLHLDVLVVDEASMIDLPMMSRLIDALPPHGRVIFLGDRDQLASVEAGAVLGDICAYVNAGFTAERARQLSRLTGSAIPAGAGTQAASLRDSLCLLQKSYRFGSDSGIGKLAAAINCGDRSAIQAVFQQGFSDIEKRTLQSSDDYAGMLDEALAGYGRYLRLLHEKAAPEAILQAFNEYQLLCALREGPFGVRGLNDRIEQAMVQQRKIQRHPHSRWYEGRPVMIARNDSALGLFNGDIGIALDRGQGLRVWFVMPDGTIKSVQPSRLPEHDTTWAMTVHKSQGSEFDHAALILPSQRSPVVTRELVYTAVTRARRRLSLYADERILAGAIVTRTERRSGLATLFDEVSRIG* +>10 +MQEVAMSSQEASKMLRTYNIAWWGNNYYDVNELGHISVCPDPDVPEARVDLAKLVKAREAQGQRLPALFCFPQILQHRLRSINAAFKRARESYGYNGDYFLVYPIKVNQHRRVIESLIHSGEPLGLEAGSKAELMAVLAHAGMTRSVIVCNGYKDREYIRLALIGEKMGHKVYLVIEKMSEIAIVLEEAERLNVVPRLGVRARLASQGSGKWQSSGGEKSKFGLAATQVLQLVETLRDAGRLDSLQLLHFHLGSQMANIRDIATGVRESARFYVELHKLGVNIQCFDVGGGLGVDYEGTRSQSDCSVNYGLNEYANNIIWAIGDACEEHGLPHPTVITESGRAVTAHHTVLVSNIIGVERNEYTDPTAPAEDAPRALQNLWETWQEMHKPGTRRSLREWLHDSQMDLHDIHIGYSSGAFSLQERAWAEQLYLSMCHEVQKQLDPQNRAHRPIIDELQERMADKMYVNFSLFQSMPDAWGIDQLFPVLPLEGLDQVPERRAVLLDITCDSDGAIDHYIDGDGIATTMPMPEYDPENPPMLGFFMVGAYQEILGNMHNLFGDTEAVDVFVFPDGSVEVELSDEGDTVADMLQYVQLDPKTLLTHFRDQVKQTDLDDALQQQFLEEFEAGLYGYTYLEDE* +>11 +MNSLPQRSTDFELTTSQDGFALSWQQRLILRHSAENPCLWIGAGVADIDMFRGNFSIKDKLNEKIALTEATVSELPDGWLVQFSRGATISATLRISADEAGRLTLDLQNDDLHHNRIWLRLAANPDDHIYGCGEQFSYFDLRGKPFPLWTSEQGVGRNKTSYVTWQADCKENAGGDYYLTFFPQPTFVSTQKYYCHVDNSCYMNFDFSAPEYHELALWEDKTTLRFECADTYIALLEKLTALLGRQPELPDWVYDGVTLGIQGGTEVCQQKLDNMRNAGVKVYGIWAQDWSGIRMTSFGKRVMWNWKWNSDNYPQLDSRIKQWKEEGVQFLSYINPYVASDKDLCAEAARHGYLAKDATGGDYLVEFGEFYGGVVDLTNPEAYDWFKDVIKKNMIALGCSGWMADFGEYLPTDTYLHNGVSAELMHNAWPALWAKCNYEALQKTGKLGEILFFMRAGYTGSQKYSTMMWAGDQNVDWSLDDGLASVVPAALSLAMTGHGLHHSDIGGYTTLFDMKRSKELLLRWCDFSAFTPMMRTHEGNRPSNDWQFDGDAETIAHFARMTTVFTTLKPYLKQAVAQNAATGLPVMRPLFLHYENDAATYTLKYQYLLGQDLLVAPVHEQGRCDWTLYLPEDHWVNIWTGEAHHGGEISVDAPIGKPPVFYRAKSEWALLFASLRNI* +>12 +MKLFLTTAALTATLTSGMGFASDTVIPWATNSGGTESTHIAAMGEDVNAQHQHIIHTHEGVCAANSGTIQADEAALTSNKPPVQVQPELLPHQG* +>13 +MKHLRVVACMIMLALAGCDNNDKTAPTTKSEAPAVAQPSPAQDPSQLQKLAQQSQGKALTLLDASEAQLDGAATLVLTFSIPLDPEQDFSRVVHVVDKKSGSVDGAWELAPNLKELRLRHLEPERVLVVTVDPAVKALNNATFGKSYEKTITTRDVQPSVGFASRGSLLPGKIAEGLPVMALNVNHVDVNFFRVKPGSLASFVSQWEYRSSLSNWESDNLLKMADLVYTGRFDLNPARNTREKLLLPLSDIKPLQQAGVYVAVMNQAGHYNYSNAATLFTLSDIGVSAHRYHSRLDIFTQSLENGAAQSGIEIVLLNDKGQTLAQATSDAQGHVQLEADKAAALLLARKEGQTTLLDLTLPALDLSEFNVAGAPGYSKQFFMFGPRDLYRPGETVILNGLLRDSDGKTLPDQPVKLEVVKPDGQVMRTVVSQPENGLYRLNYPLDINAPTGLWHVRANTGDNLLRSWDFHVEDFMPERMALNLTAQKTPLAPADEVKFSVVGYYLYGAPANGNTLQGQLFLRPLRDAVAALPGFQFGNIAEENLSRSLDEVQLTLDKGGRGEVSAASQWQEAHSPLQVILQASLLESGGRPVTRRVEQAIWPADTLPGIRPQFAAKAVYDYRTDTTVNQPIVDEDSNAAFDIVYANAQGEKKAVSGLQVRLIRERRDYYWNWSESEGWQSQFDQKDLVEGEQTLDLNADETGKVSFPVEWGAYRLEVKAPNETVSSVRFWAGYSWQDNSDGSGAARPDRVTLKLDKANYRPGDTMKLHIAAPVAGKGYAMVESSDGPLWWQAIDVPAQGLELTIPVDKTWNRHDLYLSTLVVRPGDKSRSATPKRAVGLLHLPLGDDNRRLDLALESPAKMRPNQPLTVRVKASVKHGEMPKQINVLVSAVDSGVLNITDYATPDPWQAFFGQKRYGADIYDIYGQVIEGQGRLAALRFGGDGDDLTRGGKPPVNHANIIAQQAQPITLNEQGEGVVTLPIGDFNGELRVMAQAWTADDFGRGESKVVVAAPVIAELNMPRFLAGGDVSRLVLDVTNLTDRPQTLNIALAASGLLELLSQQPQPVNLAPGVRTTLFVPVRALEGFGEGEIQATISGLNLPGETLGAQHKQWQIGVRPAWPAQTVNSGIALAPGESWHVPEQHLANVSPATLQGQLLLSGKPPLNLARYIRELKAYPYGCLEQTTSGLFPALYTNAAQLQSLGITGDSDEKRRAAVDIGISRILQMQRDNGGFALWDENGAEEPWLTAYAMDFLIRAGEQGYSVPPEAINRGNERLLRYLQDPGTMLIRYSDNTQASTFAAQAYAALVLARQQKAPLGALREIWERRSQAASGLPLMQLGIALNTMGDARRGEEAITLALNTPRQDERQWIADYGSSLRDNALMLSLLEENNLRPDAQNALLSSLSEQAFGQRWLSTQENNALFLAAHSRQASAGAWQAQTSLEAQPLSGDKALTRNLDADQLAALEVTNTGSQPLWLRLDSSGYPSSAPEPASNVLQIERQILGTDGQRKSLSSLRSGELVLVWLTVVADRNVPDALVVDLLPAGLELENQNLADSSASLPESGSEVQNLLNQMQQADIQYMEFRDDRFVAAVVVNEGQPVTLVYLARAVTPGTYQLAQPQVESMYAPQWRATGASEGLLIVTP* +>14 +MAKNRSRRLRKKMHIDEFQELGFSVAWRFPEGTSEEQIDKTVDDFINDVIEPNKLAFDGSGYLAWEGLICMQEIGKCTEEHQAIVRKWLEARNLEEVRTSELFDVWWD* +>15 +LKLIPFYLLALFSASSGATEINACKDLIGTWKTPADNPPYTVTILPPVEACGEKCVKLNVEYELDVTHRNALYCHERQEGVKGQGPMVIAFEGAYGGHAIGTYNRQLQLLRAGVIPKNKKWKWITKMENYWFRRVKAH* +>16 +MKKFDSCHPVFLLIGCAQVPLRSSVSKPVQQPSAQKEQLANANGIDECQSLPYVPSDLAKNKSLSNQIADNTASKNSAISSRIFCEKYKQTKEQALTFFQELPQYMRSKEVEEQHMTEFKKVLLEPGSKNLSIYQTLLAAHERLQAL* +>17 +MRIKPDDNWRWYYDEEHDRMMLDLANGMLFRSRFSRKMLTPDAFCPTGFCVDDAALYFSFEEKCRDFELTKEQRAELVLNALVAIRYLKPQMPKSWHFVAHGEMWTPGTGDAASVWLSDTAEQVNLLVVEPGENAALCLLAQPGVVIAGRTMQLGDAIKIMNDRLKPQVHCHSFSLEQAV* +>18 +VLAFTLRFIKNKRYFAILAGALVIIAGLDSQHACSGNGLPQINGKALAALAKQHPVVVLFRHAERCDRSDNTCLPDSTGITVNGAQDARALGKAFSADIQNYNLYSSNTVRAIQSATWFSAGRSRRAVKKMMDCGSGIYSSINTLLKNSQIKNIVILTHNHCLTYIVKNKRGVKFDPEYLNALVMYAENGKLLLDGEFVPG* +>19 +MGSNYIVIEGLEGAGKTTARDVVVETLEQLGIRNMIFTREPGGTQLAEKLRSLVLDIRSVGDEVITDKAEVLMFYAARVQLVETVIKPALAQGVWVIGDRHDLSTQAYQGGGRGIDQTMLATLRDAVLGDFRPDLTLYLDVTPEVGLKRARARGDLDRIEQESFDFFNRTRARYLELAAQDSRIRTIDATQPLDAVMRDIRATVTKWVQEQAA* +>20 +MKHIKKSVLVVLLTSHVAHASIVVGGTRLVFDGNNDESSINVENKDSKANLVQSWLSVADPQVTNKQVFIITPPLFRLDAGQKNSIRVIRSGAPLPADRESMYWLNIKGIPSIDDNASANRVEISINTQIKLIYRPPALTKSTPDSQSQQLKWQTAGDVITVNNPTPYYMNFASVTLNSHEVKSATFVPPKSSASFKLSSTAAPHGTVTWRLISDYGMSLEPHSGSF* diff --git a/tests/test_data/outputs/search/G12/run.json b/tests/test_data/outputs/search/G12/run.json new file mode 100755 index 0000000..af189eb --- /dev/null +++ b/tests/test_data/outputs/search/G12/run.json @@ -0,0 +1,27 @@ +{ + "analysis_start_time": "10/06/2024 11:12:38", + "parameters": { + "query": "locidex/extract/G12/raw.extracted.seqs.fasta", + "outdir": "locidex/search/G12", + "name": "G12", + "db": "locidex/db", + "config": null, + "min_evalue": 0.0001, + "min_dna_len": 1, + "min_aa_len": 1, + "max_dna_len": 10000000, + "max_aa_len": 10000000, + "min_dna_ident": 80.0, + "min_aa_ident": 80.0, + "min_dna_match_cov": 80.0, + "min_aa_match_cov": 80.0, + "max_target_seqs": 10, + "n_threads": 8, + "format": null, + "translation_table": 11, + "annotate": false, + "force": true + }, + "result_file": "locidex/search/G12/seq_store.json", + "analysis_end_time": "10/06/2024 11:12:40" +} \ No newline at end of file diff --git a/tests/test_data/outputs/search/G12/seq_store.json b/tests/test_data/outputs/search/G12/seq_store.json new file mode 100755 index 0000000..1fd6a02 --- /dev/null +++ b/tests/test_data/outputs/search/G12/seq_store.json @@ -0,0 +1,1801 @@ +{ + "db_info": { + "db_name": "Locidex Test Database", + "db_version": "1.0.0", + "db_date": "2024/06/06", + "db_author": "James Robertson", + "db_desc": "Using Salmonella GCA_000007545.2 as source data", + "db_num_seqs": 20, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" + }, + "db_seq_info": { + "0": { + "seq_id": 0, + "locus_name": "locus_1", + "locus_name_alt": "SALM_11273", + "locus_product": "!", + "locus_description": "hypothetical protein", + "locus_uid": "1", + "dna_seq_len": 102, + "dna_seq_hash": "d17b02d12afa7f832ee37df6f24a8f55", + "aa_seq_len": 34, + "aa_seq_hash": "84ac553cb45dd790c497c27152888f02", + "dna_min_len": 71.4, + "dna_max_len": 132.6, + "aa_min_len": 23.8, + "aa_max_len": 44.2, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "1": { + "seq_id": 1, + "locus_name": "locus_2", + "locus_name_alt": "SALM_120", + "locus_product": "@", + "locus_description": "outer membrane protein", + "locus_uid": "B", + "dna_seq_len": 285, + "dna_seq_hash": "e35184c8ff18e9116fc8faef20532f56", + "aa_seq_len": 95, + "aa_seq_hash": "2d9e7f7f0d11bf02db860b0799e7924d", + "dna_min_len": 199.5, + "dna_max_len": 370.5, + "aa_min_len": 66.5, + "aa_max_len": 123.5, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "2": { + "seq_id": 2, + "locus_name": "locus_3", + "locus_name_alt": "SALM_2016", + "locus_product": "#", + "locus_description": "tRNA (guanosine(46)-N7)-methyltransferase TrmB", + "locus_uid": "C", + "dna_seq_len": 327, + "dna_seq_hash": "670705cd2a59c4a23a897ac656a888fe", + "aa_seq_len": 109, + "aa_seq_hash": "4277f8bbf1fd6682001da089fea83d04", + "dna_min_len": 228.9, + "dna_max_len": 425.1, + "aa_min_len": 76.3, + "aa_max_len": 141.7, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "3": { + "seq_id": 3, + "locus_name": "locus_4", + "locus_name_alt": "SALM_8644", + "locus_product": "$", + "locus_description": "AZ624_004720", + "locus_uid": "AZ624_004720", + "dna_seq_len": 417, + "dna_seq_hash": "ac1b21798c0f672ad26f5a91ea278590", + "aa_seq_len": 139, + "aa_seq_hash": "0c25367401155278f34832f184ab44e6", + "dna_min_len": 291.9, + "dna_max_len": 542.1, + "aa_min_len": 97.3, + "aa_max_len": 180.7, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "4": { + "seq_id": 4, + "locus_name": "locus_5", + "locus_name_alt": "SALM_1876", + "locus_product": "%", + "locus_description": "SPI-1 type III secretion system invasion lipoprotein InvH", + "locus_uid": "E", + "dna_seq_len": 444, + "dna_seq_hash": "d00defcca8588f21ce16fa1d0ac13389", + "aa_seq_len": 148, + "aa_seq_hash": "c8bf12a8057fc5e541fcd4924136a40d", + "dna_min_len": 310.8, + "dna_max_len": 577.2, + "aa_min_len": 103.6, + "aa_max_len": 192.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "5": { + "seq_id": 5, + "locus_name": "locus_6", + "locus_name_alt": "SALM_640", + "locus_product": "^", + "locus_description": "MOSC domain-containing protein", + "locus_uid": "F", + "dna_seq_len": 543, + "dna_seq_hash": "a11561f2804e2c32c78049f8b9aeb517", + "aa_seq_len": 181, + "aa_seq_hash": "1f6a45ea291940ef2c17ec1cfdd5fbdd", + "dna_min_len": 380.1, + "dna_max_len": 705.9, + "aa_min_len": 126.7, + "aa_max_len": 235.3, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "6": { + "seq_id": 6, + "locus_name": "locus_7", + "locus_name_alt": "SALM_1501", + "locus_product": "&", + "locus_description": "India: Vellore", + "locus_uid": "G", + "dna_seq_len": 606, + "dna_seq_hash": "dc94bf1ec4ff9bed2a1f460cbd958656", + "aa_seq_len": 202, + "aa_seq_hash": "62252b3326997117f127efb88ff09294", + "dna_min_len": 424.2, + "dna_max_len": 787.8, + "aa_min_len": 141.4, + "aa_max_len": 262.6, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "7": { + "seq_id": 7, + "locus_name": "locus_8", + "locus_name_alt": "SALM_756", + "locus_product": "*", + "locus_description": "DNA polymerase III subunit delta'", + "locus_uid": "H", + "dna_seq_len": 642, + "dna_seq_hash": "7ebe74afecf146ec4db816c8deced64f", + "aa_seq_len": 214, + "aa_seq_hash": "2449629747a7c58f0f2cad411db87178", + "dna_min_len": 449.4, + "dna_max_len": 834.6, + "aa_min_len": 149.8, + "aa_max_len": 278.2, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "8": { + "seq_id": 8, + "locus_name": "locus_9", + "locus_name_alt": "SALM_7353", + "locus_product": "1", + "locus_description": "fimbrial assembly chaperone", + "locus_uid": "I", + "dna_seq_len": 684, + "dna_seq_hash": "41ebb36872854b2b33c8c028e23d8ad1", + "aa_seq_len": 228, + "aa_seq_hash": "a75a93991228940fb46d917f238beb5a", + "dna_min_len": 478.8, + "dna_max_len": 889.2, + "aa_min_len": 159.6, + "aa_max_len": 296.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "9": { + "seq_id": 9, + "locus_name": "locus_10", + "locus_name_alt": "SALM_1891", + "locus_product": "200.96", + "locus_description": "5'/3'-nucleotidase SurE", + "locus_uid": "J", + "dna_seq_len": 762, + "dna_seq_hash": "fe04d17ec353c08b903c85fc0ca4dc02", + "aa_seq_len": 254, + "aa_seq_hash": "bd09702e070040e0fc8d2ec3b830812c", + "dna_min_len": 533.4, + "dna_max_len": 990.6, + "aa_min_len": 177.8, + "aa_max_len": 330.2, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "10": { + "seq_id": 10, + "locus_name": "locus_11", + "locus_name_alt": "SALM_1452", + "locus_product": "|", + "locus_description": "1-phosphofructokinase", + "locus_uid": "K", + "dna_seq_len": 858, + "dna_seq_hash": "5b128d659955716833ce42f2bb060212", + "aa_seq_len": 286, + "aa_seq_hash": "4daecf1a6ccae76e1d97a4ce9ee4ff0b", + "dna_min_len": 600.6, + "dna_max_len": 1115.4, + "aa_min_len": 200.2, + "aa_max_len": 371.8, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "11": { + "seq_id": 11, + "locus_name": "locus_12", + "locus_name_alt": "SALM_11020", + "locus_product": "_", + "locus_description": "1", + "locus_uid": "L", + "dna_seq_len": 972, + "dna_seq_hash": "eb72da68c159497d5f0c8eeddc51b5ae", + "aa_seq_len": 324, + "aa_seq_hash": "a1f16dd269dc295e715f2d00f9c26b43", + "dna_min_len": 680.4, + "dna_max_len": 1263.6, + "aa_min_len": 226.8, + "aa_max_len": 421.2, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "12": { + "seq_id": 12, + "locus_name": "locus_13", + "locus_name_alt": "SALM_1934", + "locus_product": "-", + "locus_description": "3.9", + "locus_uid": "M", + "dna_seq_len": 1098, + "dna_seq_hash": "8f300259dcb46224bdc1fe5273107324", + "aa_seq_len": 366, + "aa_seq_hash": "b16bc26e42f5bc4327504b1ec8b2d53d", + "dna_min_len": 768.6, + "dna_max_len": 1427.4, + "aa_min_len": 256.2, + "aa_max_len": 475.8, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "13": { + "seq_id": 13, + "locus_name": "locus_14", + "locus_name_alt": "SALM_2871", + "locus_product": "+", + "locus_description": "@", + "locus_uid": "N", + "dna_seq_len": 1281, + "dna_seq_hash": "b9060019038526aa6fc38d2f7510edc6", + "aa_seq_len": 427, + "aa_seq_hash": "ed11561b2bedaa12c7a28eb0e9346101", + "dna_min_len": 896.7, + "dna_max_len": 1665.3, + "aa_min_len": 298.9, + "aa_max_len": 555.1, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "14": { + "seq_id": 14, + "locus_name": "locus_15", + "locus_name_alt": "SALM_583", + "locus_product": "=", + "locus_description": "DMT family transporter", + "locus_uid": "O", + "dna_seq_len": 1434, + "dna_seq_hash": "bc98c2fe196a68a79036814396513a8d", + "aa_seq_len": 478, + "aa_seq_hash": "16cb2acec887d5861327e04f2705b8ce", + "dna_min_len": 1003.8, + "dna_max_len": 1864.2, + "aa_min_len": 334.6, + "aa_max_len": 621.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "15": { + "seq_id": 15, + "locus_name": "locus_16", + "locus_name_alt": "SALM_780", + "locus_product": "<", + "locus_description": "murein transglycosylase A", + "locus_uid": "P", + "dna_seq_len": 1464, + "dna_seq_hash": "16e55766c603fe33c9e75d8e81743ae2", + "aa_seq_len": 488, + "aa_seq_hash": "b20314e55f9713235e9c4ea5817b56df", + "dna_min_len": 1024.8, + "dna_max_len": 1903.2, + "aa_min_len": 341.6, + "aa_max_len": 634.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "16": { + "seq_id": 16, + "locus_name": "locus_17", + "locus_name_alt": "SALM_1937", + "locus_product": ">", + "locus_description": "GTPase HflX", + "locus_uid": "Q", + "dna_seq_len": 1836, + "dna_seq_hash": "a0d97d985483413f3c18bfe5833ae9ce", + "aa_seq_len": 612, + "aa_seq_hash": "d6012eddc7a6e8d7d40761d00ed71a5a", + "dna_min_len": 1285.2, + "dna_max_len": 2386.8, + "aa_min_len": 428.4, + "aa_max_len": 795.6, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "17": { + "seq_id": 17, + "locus_name": "locus_18", + "locus_name_alt": "SALM_1997", + "locus_product": "?", + "locus_description": "biosynthetic arginine decarboxylase", + "locus_uid": "R", + "dna_seq_len": 1914, + "dna_seq_hash": "b3021e979faa7600756c06dfadfcf14c", + "aa_seq_len": 638, + "aa_seq_hash": "e68e83956ee1d4c685571e5348c8def1", + "dna_min_len": 1339.8, + "dna_max_len": 2488.2, + "aa_min_len": 446.6, + "aa_max_len": 829.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "18": { + "seq_id": 18, + "locus_name": "locus_19", + "locus_name_alt": "SALM_9926", + "locus_product": ",", + "locus_description": "https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662", + "locus_uid": "S", + "dna_seq_len": 2037, + "dna_seq_hash": "a012eee23637b48e39b00808a057e35d", + "aa_seq_len": 679, + "aa_seq_hash": "1302fea1dbd5bb756db34e09b863ad44", + "dna_min_len": 1425.9, + "dna_max_len": 2648.1, + "aa_min_len": 475.3, + "aa_max_len": 882.7, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "19": { + "seq_id": 19, + "locus_name": "locus_20", + "locus_name_alt": "SALM_6064", + "locus_product": ".", + "locus_description": "alpha-2-macroglobulin family protein", + "locus_uid": "T", + "dna_seq_len": 4935, + "dna_seq_hash": "4461918e985715e4a2b07494e1f91326", + "aa_seq_len": 1645, + "aa_seq_hash": "12ff39a1933fa478729b142976b0a659", + "dna_min_len": 3454.5, + "dna_max_len": 6415.5, + "aa_min_len": 1151.5, + "aa_max_len": 2138.5, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + } + }, + "query_data": { + "sample_name": "G12", + "query_seq_data": { + "0": { + "parent_id": "locus_1:0:0:0", + "locus_name": "locus_1:0:0:0", + "seq_id": "locus_1:0:0:0", + "dna_hash": "d17b02d12afa7f832ee37df6f24a8f55", + "dna_len": 102, + "aa_hash": "a931d1f75114576e60538364eb01a05f", + "aa_len": 34, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "1": { + "parent_id": "locus_10:9:0:1", + "locus_name": "locus_10:9:0:1", + "seq_id": "locus_10:9:0:1", + "dna_hash": "fe04d17ec353c08b903c85fc0ca4dc02", + "dna_len": 762, + "aa_hash": "988bf512f0362e276b0e5622fbaa7079", + "aa_len": 254, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "2": { + "parent_id": "locus_11:10:0:2", + "locus_name": "locus_11:10:0:2", + "seq_id": "locus_11:10:0:2", + "dna_hash": "5b128d659955716833ce42f2bb060212", + "dna_len": 858, + "aa_hash": "d6a46f107d0604f27820147b523948c8", + "aa_len": 286, + "start_codon": "gtg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "3": { + "parent_id": "locus_12:11:0:3", + "locus_name": "locus_12:11:0:3", + "seq_id": "locus_12:11:0:3", + "dna_hash": "eb72da68c159497d5f0c8eeddc51b5ae", + "dna_len": 972, + "aa_hash": "2fb77f6d0f615a030a08af74f3eda277", + "aa_len": 324, + "start_codon": "atg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "4": { + "parent_id": "locus_12:11:0:4", + "locus_name": "locus_12:11:0:4", + "seq_id": "locus_12:11:0:4", + "dna_hash": "eb72da68c159497d5f0c8eeddc51b5ae", + "dna_len": 972, + "aa_hash": "2fb77f6d0f615a030a08af74f3eda277", + "aa_len": 324, + "start_codon": "atg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "5": { + "parent_id": "locus_13:12:0:5", + "locus_name": "locus_13:12:0:5", + "seq_id": "locus_13:12:0:5", + "dna_hash": "8f300259dcb46224bdc1fe5273107324", + "dna_len": 1098, + "aa_hash": "fd5140611af6e0d9f95426bf9beaa8b1", + "aa_len": 366, + "start_codon": "atg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "6": { + "parent_id": "locus_14:13:0:6", + "locus_name": "locus_14:13:0:6", + "seq_id": "locus_14:13:0:6", + "dna_hash": "b9060019038526aa6fc38d2f7510edc6", + "dna_len": 1281, + "aa_hash": "05bc7823b1abc2e6d4e2c08ca5325134", + "aa_len": 427, + "start_codon": "ttg", + "stop_codon": "tag", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "7": { + "parent_id": "locus_15:14:0:7", + "locus_name": "locus_15:14:0:7", + "seq_id": "locus_15:14:0:7", + "dna_hash": "bc98c2fe196a68a79036814396513a8d", + "dna_len": 1434, + "aa_hash": "5d8c3cf06444c3741ebfc57cc92db0d3", + "aa_len": 478, + "start_codon": "gtg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "8": { + "parent_id": "locus_16:15:0:8", + "locus_name": "locus_16:15:0:8", + "seq_id": "locus_16:15:0:8", + "dna_hash": "16e55766c603fe33c9e75d8e81743ae2", + "dna_len": 1464, + "aa_hash": "f85b3701f5642454bf4d2263feb13354", + "aa_len": 488, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "9": { + "parent_id": "locus_17:16:0:9", + "locus_name": "locus_17:16:0:9", + "seq_id": "locus_17:16:0:9", + "dna_hash": "a0d97d985483413f3c18bfe5833ae9ce", + "dna_len": 1836, + "aa_hash": "bdd2b967a7a6337b5973dd723a62fa43", + "aa_len": 612, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "10": { + "parent_id": "locus_18:17:0:10", + "locus_name": "locus_18:17:0:10", + "seq_id": "locus_18:17:0:10", + "dna_hash": "b3021e979faa7600756c06dfadfcf14c", + "dna_len": 1914, + "aa_hash": "42c4a831ee79a27c47138fe96829814b", + "aa_len": 638, + "start_codon": "atg", + "stop_codon": "tag", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "11": { + "parent_id": "locus_19:18:0:11", + "locus_name": "locus_19:18:0:11", + "seq_id": "locus_19:18:0:11", + "dna_hash": "a012eee23637b48e39b00808a057e35d", + "dna_len": 2037, + "aa_hash": "cb1202450e68e2b4f0d557a645f1a98d", + "aa_len": 679, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "12": { + "parent_id": "locus_2:1:0:12", + "locus_name": "locus_2:1:0:12", + "seq_id": "locus_2:1:0:12", + "dna_hash": "e35184c8ff18e9116fc8faef20532f56", + "dna_len": 285, + "aa_hash": "2a1a77c25ad681437705d9145aef608c", + "aa_len": 95, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "13": { + "parent_id": "locus_20:19:0:13", + "locus_name": "locus_20:19:0:13", + "seq_id": "locus_20:19:0:13", + "dna_hash": "4461918e985715e4a2b07494e1f91326", + "dna_len": 4935, + "aa_hash": "ffe361d9273afbab0ed9009dd5aa4041", + "aa_len": 1645, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "14": { + "parent_id": "locus_3:2:0:14", + "locus_name": "locus_3:2:0:14", + "seq_id": "locus_3:2:0:14", + "dna_hash": "670705cd2a59c4a23a897ac656a888fe", + "dna_len": 327, + "aa_hash": "f46f4f7b2362abaf1440c9752e10670e", + "aa_len": 109, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "15": { + "parent_id": "locus_4:3:0:15", + "locus_name": "locus_4:3:0:15", + "seq_id": "locus_4:3:0:15", + "dna_hash": "ac1b21798c0f672ad26f5a91ea278590", + "dna_len": 417, + "aa_hash": "dbcec3a0e9ecdc165c4e9162b079f2ee", + "aa_len": 139, + "start_codon": "ctg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "16": { + "parent_id": "locus_5:4:0:16", + "locus_name": "locus_5:4:0:16", + "seq_id": "locus_5:4:0:16", + "dna_hash": "d00defcca8588f21ce16fa1d0ac13389", + "dna_len": 444, + "aa_hash": "82d8baa0a3dad18a0efd8104ee15baae", + "aa_len": 148, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "17": { + "parent_id": "locus_6:5:0:17", + "locus_name": "locus_6:5:0:17", + "seq_id": "locus_6:5:0:17", + "dna_hash": "a11561f2804e2c32c78049f8b9aeb517", + "dna_len": 543, + "aa_hash": "f7a904be6607ca5a6d9388ad9ca3693a", + "aa_len": 181, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "18": { + "parent_id": "locus_7:6:0:18", + "locus_name": "locus_7:6:0:18", + "seq_id": "locus_7:6:0:18", + "dna_hash": "dc94bf1ec4ff9bed2a1f460cbd958656", + "dna_len": 606, + "aa_hash": "da78b534d889d8f35bec304ef54f1b93", + "aa_len": 202, + "start_codon": "gtg", + "stop_codon": "tag", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "19": { + "parent_id": "locus_8:7:0:19", + "locus_name": "locus_8:7:0:19", + "seq_id": "locus_8:7:0:19", + "dna_hash": "7ebe74afecf146ec4db816c8deced64f", + "dna_len": 642, + "aa_hash": "ad5a47cb3a55985083b963b0cf67f382", + "aa_len": 214, + "start_codon": "atg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "20": { + "parent_id": "locus_9:8:0:20", + "locus_name": "locus_9:8:0:20", + "seq_id": "locus_9:8:0:20", + "dna_hash": "41ebb36872854b2b33c8c028e23d8ad1", + "dna_len": 684, + "aa_hash": "02989d1dbb505d15f06e76ddc9174b7e", + "aa_len": 228, + "start_codon": "atg", + "stop_codon": "tag", + "count_internal_stop": 0, + "dna_ambig_count": 0 + } + }, + "query_hit_columns": [], + "query_hits": { + "0": { + "nucleotide": [ + { + "qseqid": 0, + "sseqid": 0, + "qlen": 102, + "slen": 102, + "qstart": 1, + "qend": 102, + "sstart": 1, + "send": 102, + "length": 102, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 1.8e-51, + "bitscore": 189 + } + ], + "protein": [ + { + "qseqid": 0, + "sseqid": 0, + "qlen": 34, + "slen": 34, + "qstart": 1, + "qend": 34, + "sstart": 1, + "send": 34, + "length": 34, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 4.94e-20, + "bitscore": 64.3 + } + ] + }, + "1": { + "nucleotide": [ + { + "qseqid": 1, + "sseqid": 9, + "qlen": 762, + "slen": 762, + "qstart": 1, + "qend": 762, + "sstart": 1, + "send": 762, + "length": 762, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1408 + } + ], + "protein": [ + { + "qseqid": 1, + "sseqid": 9, + "qlen": 254, + "slen": 254, + "qstart": 1, + "qend": 254, + "sstart": 1, + "send": 254, + "length": 254, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 514.0 + } + ] + }, + "2": { + "nucleotide": [ + { + "qseqid": 2, + "sseqid": 10, + "qlen": 858, + "slen": 858, + "qstart": 1, + "qend": 858, + "sstart": 1, + "send": 858, + "length": 858, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1585 + } + ], + "protein": [ + { + "qseqid": 2, + "sseqid": 10, + "qlen": 286, + "slen": 286, + "qstart": 1, + "qend": 286, + "sstart": 1, + "send": 286, + "length": 286, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 579.0 + } + ] + }, + "3": { + "nucleotide": [ + { + "qseqid": 3, + "sseqid": 11, + "qlen": 972, + "slen": 972, + "qstart": 1, + "qend": 972, + "sstart": 1, + "send": 972, + "length": 972, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1796 + } + ], + "protein": [ + { + "qseqid": 3, + "sseqid": 11, + "qlen": 324, + "slen": 324, + "qstart": 1, + "qend": 324, + "sstart": 1, + "send": 324, + "length": 324, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 640.0 + } + ] + }, + "4": { + "nucleotide": [ + { + "qseqid": 4, + "sseqid": 11, + "qlen": 972, + "slen": 972, + "qstart": 1, + "qend": 972, + "sstart": 1, + "send": 972, + "length": 972, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1796 + } + ], + "protein": [ + { + "qseqid": 4, + "sseqid": 11, + "qlen": 324, + "slen": 324, + "qstart": 1, + "qend": 324, + "sstart": 1, + "send": 324, + "length": 324, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 640.0 + } + ] + }, + "5": { + "nucleotide": [ + { + "qseqid": 5, + "sseqid": 12, + "qlen": 1098, + "slen": 1098, + "qstart": 1, + "qend": 1098, + "sstart": 1, + "send": 1098, + "length": 1098, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 2028 + } + ], + "protein": [ + { + "qseqid": 5, + "sseqid": 12, + "qlen": 366, + "slen": 366, + "qstart": 1, + "qend": 366, + "sstart": 1, + "send": 366, + "length": 366, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 754.0 + } + ] + }, + "6": { + "nucleotide": [ + { + "qseqid": 6, + "sseqid": 13, + "qlen": 1281, + "slen": 1281, + "qstart": 1, + "qend": 1281, + "sstart": 1, + "send": 1281, + "length": 1281, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 2366 + } + ], + "protein": [ + { + "qseqid": 6, + "sseqid": 13, + "qlen": 427, + "slen": 427, + "qstart": 1, + "qend": 427, + "sstart": 1, + "send": 427, + "length": 427, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 869.0 + } + ] + }, + "7": { + "nucleotide": [ + { + "qseqid": 7, + "sseqid": 14, + "qlen": 1434, + "slen": 1434, + "qstart": 1, + "qend": 1434, + "sstart": 1, + "send": 1434, + "length": 1434, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 2649 + } + ], + "protein": [ + { + "qseqid": 7, + "sseqid": 14, + "qlen": 478, + "slen": 478, + "qstart": 1, + "qend": 478, + "sstart": 1, + "send": 478, + "length": 478, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 972.0 + } + ] + }, + "8": { + "nucleotide": [ + { + "qseqid": 8, + "sseqid": 15, + "qlen": 1464, + "slen": 1464, + "qstart": 1, + "qend": 1464, + "sstart": 1, + "send": 1464, + "length": 1464, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 2704 + } + ], + "protein": [ + { + "qseqid": 8, + "sseqid": 15, + "qlen": 488, + "slen": 488, + "qstart": 1, + "qend": 488, + "sstart": 1, + "send": 488, + "length": 488, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 1004.0 + } + ] + }, + "9": { + "nucleotide": [ + { + "qseqid": 9, + "sseqid": 16, + "qlen": 1836, + "slen": 1836, + "qstart": 1, + "qend": 1836, + "sstart": 1, + "send": 1836, + "length": 1836, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 3391 + } + ], + "protein": [ + { + "qseqid": 9, + "sseqid": 16, + "qlen": 612, + "slen": 612, + "qstart": 1, + "qend": 612, + "sstart": 1, + "send": 612, + "length": 612, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 1241.0 + } + ] + }, + "10": { + "nucleotide": [ + { + "qseqid": 10, + "sseqid": 17, + "qlen": 1914, + "slen": 1914, + "qstart": 1, + "qend": 1914, + "sstart": 1, + "send": 1914, + "length": 1914, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 3535 + } + ], + "protein": [ + { + "qseqid": 10, + "sseqid": 17, + "qlen": 638, + "slen": 638, + "qstart": 1, + "qend": 638, + "sstart": 1, + "send": 638, + "length": 638, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 1326.0 + } + ] + }, + "11": { + "nucleotide": [ + { + "qseqid": 11, + "sseqid": 18, + "qlen": 2037, + "slen": 2037, + "qstart": 1, + "qend": 2037, + "sstart": 1, + "send": 2037, + "length": 2037, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 3762 + } + ], + "protein": [ + { + "qseqid": 11, + "sseqid": 18, + "qlen": 679, + "slen": 679, + "qstart": 1, + "qend": 679, + "sstart": 1, + "send": 679, + "length": 679, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 1419.0 + } + ] + }, + "12": { + "nucleotide": [ + { + "qseqid": 12, + "sseqid": 1, + "qlen": 285, + "slen": 285, + "qstart": 1, + "qend": 285, + "sstart": 1, + "send": 285, + "length": 285, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 1.03e-152, + "bitscore": 527 + } + ], + "protein": [ + { + "qseqid": 12, + "sseqid": 1, + "qlen": 95, + "slen": 95, + "qstart": 1, + "qend": 95, + "sstart": 1, + "send": 95, + "length": 95, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 1.2299999999999998e-68, + "bitscore": 191.0 + } + ] + }, + "13": { + "nucleotide": [ + { + "qseqid": 13, + "sseqid": 19, + "qlen": 4935, + "slen": 4935, + "qstart": 1, + "qend": 4935, + "sstart": 1, + "send": 4935, + "length": 4935, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 9114 + } + ], + "protein": [ + { + "qseqid": 13, + "sseqid": 19, + "qlen": 1645, + "slen": 1645, + "qstart": 1, + "qend": 1645, + "sstart": 1, + "send": 1645, + "length": 1645, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 3332.0 + } + ] + }, + "14": { + "nucleotide": [ + { + "qseqid": 14, + "sseqid": 2, + "qlen": 327, + "slen": 327, + "qstart": 1, + "qend": 327, + "sstart": 1, + "send": 327, + "length": 327, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 5.34e-176, + "bitscore": 604 + } + ], + "protein": [ + { + "qseqid": 14, + "sseqid": 2, + "qlen": 109, + "slen": 109, + "qstart": 1, + "qend": 109, + "sstart": 1, + "send": 109, + "length": 109, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 3.09e-81, + "bitscore": 224.0 + } + ] + }, + "15": { + "nucleotide": [ + { + "qseqid": 15, + "sseqid": 3, + "qlen": 417, + "slen": 417, + "qstart": 1, + "qend": 417, + "sstart": 1, + "send": 417, + "length": 417, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 771 + } + ], + "protein": [ + { + "qseqid": 15, + "sseqid": 3, + "qlen": 139, + "slen": 139, + "qstart": 1, + "qend": 139, + "sstart": 1, + "send": 139, + "length": 139, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 2.34e-106, + "bitscore": 290.0 + } + ] + }, + "16": { + "nucleotide": [ + { + "qseqid": 16, + "sseqid": 4, + "qlen": 444, + "slen": 444, + "qstart": 1, + "qend": 444, + "sstart": 1, + "send": 444, + "length": 444, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 821 + } + ], + "protein": [ + { + "qseqid": 16, + "sseqid": 4, + "qlen": 148, + "slen": 148, + "qstart": 1, + "qend": 148, + "sstart": 1, + "send": 148, + "length": 148, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 1.0400000000000001e-110, + "bitscore": 302.0 + } + ] + }, + "17": { + "nucleotide": [ + { + "qseqid": 17, + "sseqid": 5, + "qlen": 543, + "slen": 543, + "qstart": 1, + "qend": 543, + "sstart": 1, + "send": 543, + "length": 543, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1003 + } + ], + "protein": [ + { + "qseqid": 17, + "sseqid": 5, + "qlen": 181, + "slen": 181, + "qstart": 1, + "qend": 181, + "sstart": 1, + "send": 181, + "length": 181, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 1.7899999999999999e-140, + "bitscore": 380.0 + } + ] + }, + "18": { + "nucleotide": [ + { + "qseqid": 18, + "sseqid": 6, + "qlen": 606, + "slen": 606, + "qstart": 1, + "qend": 606, + "sstart": 1, + "send": 606, + "length": 606, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1120 + } + ], + "protein": [ + { + "qseqid": 18, + "sseqid": 6, + "qlen": 202, + "slen": 202, + "qstart": 1, + "qend": 202, + "sstart": 1, + "send": 202, + "length": 202, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 3.71e-154, + "bitscore": 416.0 + } + ] + }, + "19": { + "nucleotide": [ + { + "qseqid": 19, + "sseqid": 7, + "qlen": 642, + "slen": 642, + "qstart": 1, + "qend": 642, + "sstart": 1, + "send": 642, + "length": 642, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1186 + } + ], + "protein": [ + { + "qseqid": 19, + "sseqid": 7, + "qlen": 214, + "slen": 214, + "qstart": 1, + "qend": 214, + "sstart": 1, + "send": 214, + "length": 214, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 1.75e-157, + "bitscore": 426.0 + } + ] + }, + "20": { + "nucleotide": [ + { + "qseqid": 20, + "sseqid": 8, + "qlen": 684, + "slen": 684, + "qstart": 1, + "qend": 684, + "sstart": 1, + "send": 684, + "length": 684, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1264 + } + ], + "protein": [ + { + "qseqid": 20, + "sseqid": 8, + "qlen": 228, + "slen": 228, + "qstart": 1, + "qend": 228, + "sstart": 1, + "send": 228, + "length": 228, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 6.32e-172, + "bitscore": 463.0 + } + ] + } + }, + "locus_profile": { + "locus_1": { + "nucleotide": [ + "0" + ], + "protein": [ + "0" + ] + }, + "locus_2": { + "nucleotide": [ + "12" + ], + "protein": [ + "12" + ] + }, + "locus_3": { + "nucleotide": [ + "14" + ], + "protein": [ + "14" + ] + }, + "locus_4": { + "nucleotide": [ + "15" + ], + "protein": [ + "15" + ] + }, + "locus_5": { + "nucleotide": [ + "16" + ], + "protein": [ + "16" + ] + }, + "locus_6": { + "nucleotide": [ + "17" + ], + "protein": [ + "17" + ] + }, + "locus_7": { + "nucleotide": [ + "18" + ], + "protein": [ + "18" + ] + }, + "locus_8": { + "nucleotide": [ + "19" + ], + "protein": [ + "19" + ] + }, + "locus_9": { + "nucleotide": [ + "20" + ], + "protein": [ + "20" + ] + }, + "locus_10": { + "nucleotide": [ + "1" + ], + "protein": [ + "1" + ] + }, + "locus_11": { + "nucleotide": [ + "2" + ], + "protein": [ + "2" + ] + }, + "locus_12": { + "nucleotide": [ + "3", + "4" + ], + "protein": [ + "3", + "4" + ] + }, + "locus_13": { + "nucleotide": [ + "5" + ], + "protein": [ + "5" + ] + }, + "locus_14": { + "nucleotide": [ + "6" + ], + "protein": [ + "6" + ] + }, + "locus_15": { + "nucleotide": [ + "7" + ], + "protein": [ + "7" + ] + }, + "locus_16": { + "nucleotide": [ + "8" + ], + "protein": [ + "8" + ] + }, + "locus_17": { + "nucleotide": [ + "9" + ], + "protein": [ + "9" + ] + }, + "locus_18": { + "nucleotide": [ + "10" + ], + "protein": [ + "10" + ] + }, + "locus_19": { + "nucleotide": [ + "11" + ], + "protein": [ + "11" + ] + }, + "locus_20": { + "nucleotide": [ + "13" + ], + "protein": [ + "13" + ] + } + } + }, + "query_hit_columns": [ + "qseqid", + "sseqid", + "qlen", + "slen", + "qstart", + "qend", + "sstart", + "send", + "length", + "mismatch", + "pident", + "qcovhsp", + "qcovs", + "sstrand", + "evalue", + "bitscore" + ] +} \ No newline at end of file diff --git a/tests/test_data/outputs/search/G13/blast/nucleotide/hsps.txt b/tests/test_data/outputs/search/G13/blast/nucleotide/hsps.txt new file mode 100755 index 0000000..a515bcf --- /dev/null +++ b/tests/test_data/outputs/search/G13/blast/nucleotide/hsps.txt @@ -0,0 +1,21 @@ +0 0 102 102 1 102 1 102 102 0 100.000 100 100 plus 1.80e-51 189 +1 9 762 762 1 762 1 762 762 0 100.000 100 100 plus 0.0 1408 +2 10 858 858 1 858 1 858 858 19 97.786 100 100 plus 0.0 1480 +3 11 972 972 1 972 1 972 972 0 100.000 100 100 plus 0.0 1796 +4 12 1098 1098 1 1098 1 1098 1098 0 100.000 100 100 plus 0.0 2028 +5 12 1098 1098 1 1098 1 1098 1098 0 100.000 100 100 plus 0.0 2028 +6 13 1281 1281 1 1281 1 1281 1281 11 99.141 100 100 plus 0.0 2305 +7 14 1434 1434 1 1434 1 1434 1434 0 100.000 100 100 plus 0.0 2649 +8 15 1464 1464 1 1464 1 1464 1464 15 98.975 100 100 plus 0.0 2621 +9 16 1836 1836 1 1836 1 1836 1836 0 100.000 100 100 plus 0.0 3391 +10 17 1914 1914 1 1914 1 1914 1914 0 100.000 100 100 plus 0.0 3535 +11 18 2037 2037 1 2037 1 2037 2037 16 99.215 100 100 plus 0.0 3674 +12 1 285 285 1 285 1 285 285 17 94.035 100 100 plus 2.31e-124 433 +13 19 4935 4935 1 4935 1 4935 4935 0 100.000 100 100 plus 0.0 9114 +14 2 327 327 1 327 1 327 327 0 100.000 100 100 plus 5.34e-176 604 +15 3 417 417 1 417 1 417 417 11 97.362 100 100 plus 0.0 710 +16 4 444 444 1 444 1 444 444 15 96.622 100 100 plus 0.0 737 +17 5 543 543 1 543 1 543 543 0 100.000 100 100 plus 0.0 1003 +18 6 606 606 1 606 1 606 606 15 97.525 100 100 plus 0.0 1037 +19 7 642 642 1 642 1 642 642 0 100.000 100 100 plus 0.0 1186 +20 8 684 684 1 684 1 684 684 0 100.000 100 100 plus 0.0 1264 diff --git a/tests/test_data/outputs/search/G13/blast/nucleotide/queries.fasta b/tests/test_data/outputs/search/G13/blast/nucleotide/queries.fasta new file mode 100755 index 0000000..00e5ec3 --- /dev/null +++ b/tests/test_data/outputs/search/G13/blast/nucleotide/queries.fasta @@ -0,0 +1,42 @@ +>0 +atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa +>1 +atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa +>2 +gtgaacctggtgaaaaccacgggtctgcacgcggcgggcaaaggcattaacgttgccaaagtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccgacttcaacttttccggctttgacgtgactcccgcagactgggaacgctttgttaacgactccctgagctggctgggtcagttcgatatggtctgcgtcagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatctttgatagtagccgtgaagcgttagtcgccggtcttaaagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcacgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgtgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggtaagtaccgtcggcgcaggcgattccatggttggcggattgatttacggcctgctgatgcgcgagtccagcgaacatacgctgcgtctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga +>3 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>4 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>5 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>6 +ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctcttccgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccaacgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcagaaaggcgggattggtttgcgcggtccgggtgaaacccagctcgaaaccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgaaaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgctgctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaaatccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtctgcaggtacgaatgccgattgtcgactggcgtcgcctctgtaaacaagaaccagcgttgatcgaatacgtgatctag +>7 +gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga +>8 +atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctggtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatcagcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacgctgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcattcaaccggaatggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaacgctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgaggcgctggcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtgagctgaccagccttgtgcgcaaccttaatcaactgctcaaaagcgagcgcgaacgttataacaaataccgcacgaccctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatcagcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcataccccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttttggccgacagcatcccacacagaaagaggaataa +>9 +atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa +>10 +atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag +>11 +atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacattgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccaccgtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggccgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattactggaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcattcagggcggtacggaagtttgtcagcaaaaactggataccatgcgcaacgcaggcgtaaaagtgaacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggcgtcagcgcagagatcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacaggagaccggcaagctcggcgagatcctgttctttatgcgtgcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccggcaataactggcagttcgacggcgacgcggaaactattgcccactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgaaattaccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa +>12 +atgaaactttttctgacaacggcagcgctaactgcgaccttgatttccggcatggcatttgccagcgatcccgtcatcccctgggcgactaacagcggcggtacggaaagcacgcatattgcggcaatgggcgaggatttgaatgcgcagcatcagcagatcacccacacgcatgaaggcgtatgggcggccaattccggcagcattcaggctgatgaagccgcgctgaccagcaacaagccgccggtacaaggccacccggaattgatgccgcatcagggctaa +>13 +atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa +>14 +atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa +>15 +ctgaaactaatacctttttatttgctcgcgctattttctgccgcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaaccactgcagataacccgccttataccatgacaatattgccgccagtagaaggctgcggggaaaaatgtgtgaaactgaacgtacagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaaggacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctataaccgacagctgcaattactttgggctggcgtaataccgaaaaataaaaaggggaaatggataacgaaaatggaaaattactggttcaggcaggtaaaagcgcactaa +>16 +atgaaaaaattttatagctgtcttcctgtctttttactgatcggctgtgcccaggtgcccctcccttcctccgtgagcaaaccggtacagcaacctggcgctcagaaagagcaactggccaacgcaaatagtattgatgagtgtcagtctcttccgtatgtgccgtcagaccttgcgaagaataaatcattatcaaaccagaacgctgataattccgcatcaaaaaatagcgcaatcagctcaagcattttttgcgaaaaatataaacaaaccaaagaacaggcgcttaccttcttccaggaacatccacaatacatgcgttcgaaagaagatgaagagcaactcatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa +>17 +atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa +>18 +gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggcctcacaacatgcctggagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgtcagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcaccatccagtcggcaacctggttttccgcaggcaggtcgcttacggcggataaaaaaatgatggattgcggcagcggtatttatgcttcaatcaatacgttactgaaaaagtcgcagaataaaaatatagtcatttttacccataatcattgtttgacgtatattgcaaaaaataagcgcggcgtgaagtttgatccggactatcttaatgcgctcgttatgtatgcggaaaacggaaaactctttttagacggtgaatttgttcccggctag +>19 +atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga +>20 +atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/search/G13/blast/protein/hsps.txt b/tests/test_data/outputs/search/G13/blast/protein/hsps.txt new file mode 100755 index 0000000..b0cf610 --- /dev/null +++ b/tests/test_data/outputs/search/G13/blast/protein/hsps.txt @@ -0,0 +1,21 @@ +0 0 34 34 1 34 1 34 34 0 100.000 100 100 N/A 4.94e-20 64.3 +1 9 254 254 1 254 1 254 254 0 100.000 100 100 N/A 0.0 514 +2 10 286 286 1 286 1 286 286 17 94.056 100 100 N/A 0.0 518 +3 11 324 324 1 324 1 324 324 0 100.000 100 100 N/A 0.0 640 +4 12 366 366 1 366 1 366 366 0 100.000 100 100 N/A 0.0 754 +5 12 366 366 1 366 1 366 366 0 100.000 100 100 N/A 0.0 754 +6 13 427 427 1 427 1 427 427 9 97.892 100 100 N/A 0.0 846 +7 14 478 478 1 478 1 478 478 0 100.000 100 100 N/A 0.0 972 +8 15 488 488 1 488 1 488 488 14 97.131 100 100 N/A 0.0 972 +9 16 612 612 1 612 1 612 612 0 100.000 100 100 N/A 0.0 1241 +10 17 638 638 1 638 1 638 638 0 100.000 100 100 N/A 0.0 1326 +11 18 679 679 1 679 1 679 679 8 98.822 100 100 N/A 0.0 1403 +12 1 95 95 1 95 1 95 95 11 88.421 100 100 N/A 2.89e-51 147 +13 19 1645 1645 1 1645 1 1645 1645 0 100.000 100 100 N/A 0.0 3332 +14 2 109 109 1 109 1 109 109 0 100.000 100 100 N/A 3.09e-81 224 +15 3 139 139 1 139 1 139 139 9 93.525 100 100 N/A 3.70e-98 269 +16 4 148 148 1 148 1 148 148 11 92.568 100 100 N/A 1.56e-97 268 +17 5 181 181 1 181 1 181 181 0 100.000 100 100 N/A 1.79e-140 380 +18 6 202 202 1 202 1 202 202 14 93.069 100 100 N/A 2.36e-141 384 +19 7 214 214 1 214 1 214 214 0 100.000 100 100 N/A 1.75e-157 426 +20 8 228 228 1 228 1 228 228 0 100.000 100 100 N/A 6.32e-172 463 diff --git a/tests/test_data/outputs/search/G13/blast/protein/queries.fasta b/tests/test_data/outputs/search/G13/blast/protein/queries.fasta new file mode 100755 index 0000000..7cb4274 --- /dev/null +++ b/tests/test_data/outputs/search/G13/blast/protein/queries.fasta @@ -0,0 +1,42 @@ +>0 +MYDPPFLEALMITASFFAIFIIIVVSVLLLEGD* +>1 +MRILLSNDDGVHAPGIQTLAKALREFADVQVVAPDRNRSGASNSLTLESSLRTFTFDNGDIAVQMGTPTDCVYLGVNALMRPRPDIVVSGINAGPNLGDDVIYSGTVAAAMEGRHLGFPALAVSLNGYQHYDTAAAVTCALLRGLSREPLRTGRILNVNVPDLPLAQVKGIRVTRCGSRHPADKVIPQEDPRGNTLYWIGPPGDKYDAGPDTDFAAVDEGYVSVTPLHVDLTAHSAHDVVSDWLDSVGVGTQW* +>2 +VNLVKTTGLHAAGKGINVAKVLKDLGIDVTVGGFLGKDNQDGFQQLFSELGIANRFQVVQGRTRINVKLTEKDGEVTDFNFSGFDVTPADWERFVNDSLSWLGQFDMVCVSGSLPAGVSPEAFTDWMTRLRSQCPCIIFDSSREALVAGLKAAPWLVKPNRRELEIWAGRKLPEMKDVIDAAHALREQGIAHVVISLGAEGALWVNASGEWIAKPPAVDVVSTVGAGDSMVGGLIYGLLMRESSEHTLRLATAVAALAVSQSNVGITDRPQLAAMMARVDLQPFN* +>3 +MNKNKYSTPLLMLATILAGMLSPMQSAVNGQLGHWLQDGNACAVISFASGLVVMFFIIIARKETRQQFASIPTLIKKRKIPLWNWFAGLCGAMVVFSEGASASALGVATFQTALISALLLSGLLCDRFGIGVEEKKYFTPWRITGALFAVIATIFVVSPQWHSTSFILLAILPFLAGLLAGWQPAGNAKVAEATGSMLVSITWNFIVGFCVLGAALAIRIALGHVTIQLPDTWWMYLGGPLGLLSIGLMAILVRGLGLLMLGVASTAGQLLGSVLIDELIPSLGNTVYLVTIIGTLFALVGAIVTTIPEYRASKMAQKMEVSE* +>4 +MKGRWAKYVATGVMLAMLAACSSKPTDRGQQYKDGKFTQPFSLVNQPDAVGAPINAGDFAEQVDQIRSASPRLYTNQSNVYNAVQNWLRSGGDTRTMRQFGIDAWQMEGTDNYGNVQFTGYYTPVVQARHTRQGAFQYPIYSMPPKRGRLPSRAQIYAGALSDKYILAWSNSLMDNFIMDVQGSGYIDFGDGSPLNFFSYAGKNGWPYRSIGKVLIDRGEVKKEDMSMQAIREWGEKHSEAEVRELLEQNPSFVFFKPQSFAPVKGASAVPLIGRASVASDRSIIPPGTTLLAEVPLLDNNGKFSGQYELRLMVALDVGGAIKGQHFDIYQGIGPDAGHRAGWYNHYGRVWVLKSAPGAGNVFSG* +>5 +MKGRWAKYVATGVMLAMLAACSSKPTDRGQQYKDGKFTQPFSLVNQPDAVGAPINAGDFAEQVDQIRSASPRLYTNQSNVYNAVQNWLRSGGDTRTMRQFGIDAWQMEGTDNYGNVQFTGYYTPVVQARHTRQGAFQYPIYSMPPKRGRLPSRAQIYAGALSDKYILAWSNSLMDNFIMDVQGSGYIDFGDGSPLNFFSYAGKNGWPYRSIGKVLIDRGEVKKEDMSMQAIREWGEKHSEAEVRELLEQNPSFVFFKPQSFAPVKGASAVPLIGRASVASDRSIIPPGTTLLAEVPLLDNNGKFSGQYELRLMVALDVGGAIKGQHFDIYQGIGPDAGHRAGWYNHYGRVWVLKSAPGAGNVFSG* +>6 +LFDRYDAGEQAVLVHIYFSQDKDMEDLQEFESLVSSAGVEAMQVITGSRKAPHPKYFVGEGKAVEIAEAVKATGAAVVLFDHALSPAQERNLERLCECRVIDRTGLILDIFAQRARTHEGKLQVELAQLRHLATRLVRGWTHLERQKGGIGLRGPGETQLETDRRLLRNRIVQIQSRLEKVEKQREQGRQSRIKADVPTVSLVGYTNAGKSTLFNQITEARVYAADQLFATLDPTLRRIDVADVGETVLADTVGFIRHLPYDLVAAFKATLQETRQATLLLHVVDAADVRVQENIEAVNTVLEEIDAHEIPTLMVMNKIDMLDDFEPRIDRDEENKPIRVWLSAQSGVGIPQLFQALTERLSGEVAQHTLRLLPQEGRLRSRFYQLQAIEKEWMEEDGSVSLQVRMPIVDWRRLCKQEPALIEYVI* +>7 +VAQRILVLGASGYIGQHLVFALSQQGHQVRAAARRVERLEKHRLANVSCHKVDLHWPENLPALLRDIDTVYYLVHGMGEGGDFIAHERQAALNVRDALRQTPVKQLIFLSSLQAPAHEQSDHLRARQLTADTLRDAGVPVTELRAGIIVGAGSAAFEVMRDMVYNLPILTPPRWVRSRTTPIALENLLYYLVGLLEHPAHEHRILEAAGPQVLSYQQQFERFMAVSGKRRPLIPVPFPTRWISVWFLNVITSVPPTTAKALIQGLRHDLLADDAALKKLIPQTLITFDDAVRRTLKEEEKLVNSSDWGYDALAFARWRPEYGYFPKQAGFTAQTPASLSALWQVVNRLGGKEGYFFGNILWQTRAAMDRLVGHKLAKGRPSHTLLKPGDTVDSWKVIIVEPEKQLTLLFGMKAPGLGRLSFTLHDKGRYREIDVRAWWHPHGMPGLIYWLLMIPAHLFIFRGMARRIARLAEQITEK* +>8 +MNKFARHFLPLSLRVRFLLATAGVVLVLSLAYGIVALVGYSVSFDKTTFRLLRGESNLFYTLAKWENNKISVELPENLDMQSPTMTLIYDETGKLLWTQRNIPWLIKSIQPEWLKTNGFHEIETNVDATSTLLSEDHSAQEKLKEVREDDDDAEMTHSVAVNIYPATTRMPQLTIVVVDTIPIELKRSYMVWSWFVYVLAANLLLVIPLLWIAAWWSLRPIEALAREVRELEDHHREMLNPETTRELTSLVRNLNQLLKSERERYNKYRTTLTDLTHSLKTPLAVLQSTLRSLRNEKMSVSKAEPVMLEQISRISQQIGYYLHRASMRGSGVLLSRELHPVAPLLDNLISALNKVYQRKGVNISMDISPEISFVGEQNDFVEVMGNVLDNACKYCLEFVEISARQTDDHLHIFVEDDGPGIPHSKRSLVFDRGQRADTLRPGQGVGLAVAREITEQYAGQIIASDSLLGGARMEVVFGRQHPTQKEE* +>9 +MTIQKRLLEAVEQKLLRPIDAQFALTVAGNDDPAVTLAAALLSHDAGEGHVCLPLSRLTLTEEAHPLLVAWISETATPIDWKKRLLASAAVSCGDSPAPLILCGDRLYLNRMWCNERTVARFFNEVNQAIAVDEDQLSRILDALFPPTDEVNWQKVAAAVALTRRISVISGGPGTGKTTTVAKLLAALIQMADGERCRIRLAAPTGKAAARLTESLGAALRQLPLTDAQKKRIPEDASTLHRLLGAQPGSQRLRHHAGNPLHLDVLVVDEASMIDLPMMSRLIDALPPHGRVIFLGDRDQLASVEAGAVLGDICAYVNAGFTAERARQLSRLTGSAIPAGAGTQAASLRDSLCLLQKSYRFGSDSGIGKLAAAINCGDRSAIQAVFQQGFSDIEKRTLQSSDDYAGMLDEALAGYGRYLRLLHEKAAPEAILQAFNEYQLLCALREGPFGVRGLNDRIEQAMVQQRKIQRHPHSRWYEGRPVMIARNDSALGLFNGDIGIALDRGQGLRVWFVMPDGTIKSVQPSRLPEHDTTWAMTVHKSQGSEFDHAALILPSQRSPVVTRELVYTAVTRARRRLSLYADERILAGAIVTRTERRSGLATLFDEVSRIG* +>10 +MQEVAMSSQEASKMLRTYNIAWWGNNYYDVNELGHISVCPDPDVPEARVDLAKLVKAREAQGQRLPALFCFPQILQHRLRSINAAFKRARESYGYNGDYFLVYPIKVNQHRRVIESLIHSGEPLGLEAGSKAELMAVLAHAGMTRSVIVCNGYKDREYIRLALIGEKMGHKVYLVIEKMSEIAIVLEEAERLNVVPRLGVRARLASQGSGKWQSSGGEKSKFGLAATQVLQLVETLRDAGRLDSLQLLHFHLGSQMANIRDIATGVRESARFYVELHKLGVNIQCFDVGGGLGVDYEGTRSQSDCSVNYGLNEYANNIIWAIGDACEEHGLPHPTVITESGRAVTAHHTVLVSNIIGVERNEYTDPTAPAEDAPRALQNLWETWQEMHKPGTRRSLREWLHDSQMDLHDIHIGYSSGAFSLQERAWAEQLYLSMCHEVQKQLDPQNRAHRPIIDELQERMADKMYVNFSLFQSMPDAWGIDQLFPVLPLEGLDQVPERRAVLLDITCDSDGAIDHYIDGDGIATTMPMPEYDPENPPMLGFFMVGAYQEILGNMHNLFGDTEAVDVFVFPDGSVEVELSDEGDTVADMLQYVQLDPKTLLTHFRDQVKQTDLDDALQQQFLEEFEAGLYGYTYLEDE* +>11 +MNSLPQRSTDFELTTSQDGFALSWQQRLILRHSAENPCLWIGAGVADIDMFRGNFSIKDKLNEKIALTEATVSELPDGWLVQFSRGATISATLRISADEAGRLTLDLQNDDLHHNRIWLRLAANPDDHIYGCGEQFSYFDLRGKPFPLWTSEQGVGRNKTSYVTWQADCKENAGGDYYWTFFPQPTFVSTQKYYCHVDNSCYMNFDFSAPEYHELALWEDKTTLRFECADTYIALLEKLTALLGRQPELPDWVYDGVTLGIQGGTEVCQQKLDTMRNAGVKVNGIWAQDWSGIRMTSFGKRVMWNWKWNSDNYPQLDSRIKQWKEEGVQFLSYINPYVASDKDLCAEAARHGYLAKDATGGDYLVEFGEFYGGVVDLTNPEAYDWFKDVIKKNMIALGCSGWMADFGEYLPTDTYLHNGVSAEIMHNAWPALWAKCNYEALQETGKLGEILFFMRAGYTGSQKYSTMMWAGDQNVDWSLDDGLASVVPAALSLAMTGHGLHHSDIGGYTTLFDMKRSKELLLRWCDFSAFTPMMRTHEGNRPGNNWQFDGDAETIAHFARMTTVFTTLKPYLKQAVAQNAATGLPVMRPLFLHYENDAATYTLKYQYLLGQDLLVAPVHEQGRCDWTLYLPEDHWVNIWTGEAHHGGEITVDAPIGKPPVFYRAKSEWALLFASLRNI* +>12 +MKLFLTTAALTATLISGMAFASDPVIPWATNSGGTESTHIAAMGEDLNAQHQQITHTHEGVWAANSGSIQADEAALTSNKPPVQGHPELMPHQG* +>13 +MKHLRVVACMIMLALAGCDNNDKTAPTTKSEAPAVAQPSPAQDPSQLQKLAQQSQGKALTLLDASEAQLDGAATLVLTFSIPLDPEQDFSRVVHVVDKKSGSVDGAWELAPNLKELRLRHLEPERVLVVTVDPAVKALNNATFGKSYEKTITTRDVQPSVGFASRGSLLPGKIAEGLPVMALNVNHVDVNFFRVKPGSLASFVSQWEYRSSLSNWESDNLLKMADLVYTGRFDLNPARNTREKLLLPLSDIKPLQQAGVYVAVMNQAGHYNYSNAATLFTLSDIGVSAHRYHSRLDIFTQSLENGAAQSGIEIVLLNDKGQTLAQATSDAQGHVQLEADKAAALLLARKEGQTTLLDLTLPALDLSEFNVAGAPGYSKQFFMFGPRDLYRPGETVILNGLLRDSDGKTLPDQPVKLEVVKPDGQVMRTVVSQPENGLYRLNYPLDINAPTGLWHVRANTGDNLLRSWDFHVEDFMPERMALNLTAQKTPLAPADEVKFSVVGYYLYGAPANGNTLQGQLFLRPLRDAVAALPGFQFGNIAEENLSRSLDEVQLTLDKGGRGEVSAASQWQEAHSPLQVILQASLLESGGRPVTRRVEQAIWPADTLPGIRPQFAAKAVYDYRTDTTVNQPIVDEDSNAAFDIVYANAQGEKKAVSGLQVRLIRERRDYYWNWSESEGWQSQFDQKDLVEGEQTLDLNADETGKVSFPVEWGAYRLEVKAPNETVSSVRFWAGYSWQDNSDGSGAARPDRVTLKLDKANYRPGDTMKLHIAAPVAGKGYAMVESSDGPLWWQAIDVPAQGLELTIPVDKTWNRHDLYLSTLVVRPGDKSRSATPKRAVGLLHLPLGDDNRRLDLALESPAKMRPNQPLTVRVKASVKHGEMPKQINVLVSAVDSGVLNITDYATPDPWQAFFGQKRYGADIYDIYGQVIEGQGRLAALRFGGDGDDLTRGGKPPVNHANIIAQQAQPITLNEQGEGVVTLPIGDFNGELRVMAQAWTADDFGRGESKVVVAAPVIAELNMPRFLAGGDVSRLVLDVTNLTDRPQTLNIALAASGLLELLSQQPQPVNLAPGVRTTLFVPVRALEGFGEGEIQATISGLNLPGETLGAQHKQWQIGVRPAWPAQTVNSGIALAPGESWHVPEQHLANVSPATLQGQLLLSGKPPLNLARYIRELKAYPYGCLEQTTSGLFPALYTNAAQLQSLGITGDSDEKRRAAVDIGISRILQMQRDNGGFALWDENGAEEPWLTAYAMDFLIRAGEQGYSVPPEAINRGNERLLRYLQDPGTMLIRYSDNTQASTFAAQAYAALVLARQQKAPLGALREIWERRSQAASGLPLMQLGIALNTMGDARRGEEAITLALNTPRQDERQWIADYGSSLRDNALMLSLLEENNLRPDAQNALLSSLSEQAFGQRWLSTQENNALFLAAHSRQASAGAWQAQTSLEAQPLSGDKALTRNLDADQLAALEVTNTGSQPLWLRLDSSGYPSSAPEPASNVLQIERQILGTDGQRKSLSSLRSGELVLVWLTVVADRNVPDALVVDLLPAGLELENQNLADSSASLPESGSEVQNLLNQMQQADIQYMEFRDDRFVAAVVVNEGQPVTLVYLARAVTPGTYQLAQPQVESMYAPQWRATGASEGLLIVTP* +>14 +MAKNRSRRLRKKMHIDEFQELGFSVAWRFPEGTSEEQIDKTVDDFINDVIEPNKLAFDGSGYLAWEGLICMQEIGKCTEEHQAIVRKWLEARNLEEVRTSELFDVWWD* +>15 +LKLIPFYLLALFSAASGATEINACKDLIGTWKTTADNPPYTMTILPPVEGCGEKCVKLNVQYELDVTHRNALYCHEGQEGVKGQGPMVIAFEGAYGGHAIGTYNRQLQLLWAGVIPKNKKGKWITKMENYWFRQVKAH* +>16 +MKKFYSCLPVFLLIGCAQVPLPSSVSKPVQQPGAQKEQLANANSIDECQSLPYVPSDLAKNKSLSNQNADNSASKNSAISSSIFCEKYKQTKEQALTFFQEHPQYMRSKEDEEQLMTEFKKVLLEPGSKNLSIYQTLLAAHERLQAL* +>17 +MRIKPDDNWRWYYDEEHDRMMLDLANGMLFRSRFSRKMLTPDAFCPTGFCVDDAALYFSFEEKCRDFELTKEQRAELVLNALVAIRYLKPQMPKSWHFVAHGEMWTPGTGDAASVWLSDTAEQVNLLVVEPGENAALCLLAQPGVVIAGRTMQLGDAIKIMNDRLKPQVHCHSFSLEQAV* +>18 +VLAFTLRFIKNKRYFAILAGALVIIAGLASQHAWSGNGLPQINGKALAALAKQHPVVVLFRHAERCDRSDNTCLSDSTGITVNGAQDARALGKAFSADIQNYNLYSSNTVRTIQSATWFSAGRSLTADKKMMDCGSGIYASINTLLKKSQNKNIVIFTHNHCLTYIAKNKRGVKFDPDYLNALVMYAENGKLFLDGEFVPG* +>19 +MGSNYIVIEGLEGAGKTTARDVVVETLEQLGIRNMIFTREPGGTQLAEKLRSLVLDIRSVGDEVITDKAEVLMFYAARVQLVETVIKPALAQGVWVIGDRHDLSTQAYQGGGRGIDQTMLATLRDAVLGDFRPDLTLYLDVTPEVGLKRARARGDLDRIEQESFDFFNRTRARYLELAAQDSRIRTIDATQPLDAVMRDIRATVTKWVQEQAA* +>20 +MKHIKKSVLVVLLTSHVAHASIVVGGTRLVFDGNNDESSINVENKDSKANLVQSWLSVADPQVTNKQVFIITPPLFRLDAGQKNSIRVIRSGAPLPADRESMYWLNIKGIPSIDDNASANRVEISINTQIKLIYRPPALTKSTPDSQSQQLKWQTAGDVITVNNPTPYYMNFASVTLNSHEVKSATFVPPKSSASFKLSSTAAPHGTVTWRLISDYGMSLEPHSGSF* diff --git a/tests/test_data/outputs/search/G13/run.json b/tests/test_data/outputs/search/G13/run.json new file mode 100755 index 0000000..9e512b7 --- /dev/null +++ b/tests/test_data/outputs/search/G13/run.json @@ -0,0 +1,27 @@ +{ + "analysis_start_time": "10/06/2024 11:12:44", + "parameters": { + "query": "locidex/extract/G13/raw.extracted.seqs.fasta", + "outdir": "locidex/search/G13", + "name": "G13", + "db": "locidex/db", + "config": null, + "min_evalue": 0.0001, + "min_dna_len": 1, + "min_aa_len": 1, + "max_dna_len": 10000000, + "max_aa_len": 10000000, + "min_dna_ident": 80.0, + "min_aa_ident": 80.0, + "min_dna_match_cov": 80.0, + "min_aa_match_cov": 80.0, + "max_target_seqs": 10, + "n_threads": 8, + "format": null, + "translation_table": 11, + "annotate": false, + "force": true + }, + "result_file": "locidex/search/G13/seq_store.json", + "analysis_end_time": "10/06/2024 11:12:46" +} \ No newline at end of file diff --git a/tests/test_data/outputs/search/G13/seq_store.json b/tests/test_data/outputs/search/G13/seq_store.json new file mode 100755 index 0000000..7e156b9 --- /dev/null +++ b/tests/test_data/outputs/search/G13/seq_store.json @@ -0,0 +1,1801 @@ +{ + "db_info": { + "db_name": "Locidex Test Database", + "db_version": "1.0.0", + "db_date": "2024/06/06", + "db_author": "James Robertson", + "db_desc": "Using Salmonella GCA_000007545.2 as source data", + "db_num_seqs": 20, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" + }, + "db_seq_info": { + "0": { + "seq_id": 0, + "locus_name": "locus_1", + "locus_name_alt": "SALM_11273", + "locus_product": "!", + "locus_description": "hypothetical protein", + "locus_uid": "1", + "dna_seq_len": 102, + "dna_seq_hash": "d17b02d12afa7f832ee37df6f24a8f55", + "aa_seq_len": 34, + "aa_seq_hash": "84ac553cb45dd790c497c27152888f02", + "dna_min_len": 71.4, + "dna_max_len": 132.6, + "aa_min_len": 23.8, + "aa_max_len": 44.2, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "1": { + "seq_id": 1, + "locus_name": "locus_2", + "locus_name_alt": "SALM_120", + "locus_product": "@", + "locus_description": "outer membrane protein", + "locus_uid": "B", + "dna_seq_len": 285, + "dna_seq_hash": "e35184c8ff18e9116fc8faef20532f56", + "aa_seq_len": 95, + "aa_seq_hash": "2d9e7f7f0d11bf02db860b0799e7924d", + "dna_min_len": 199.5, + "dna_max_len": 370.5, + "aa_min_len": 66.5, + "aa_max_len": 123.5, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "2": { + "seq_id": 2, + "locus_name": "locus_3", + "locus_name_alt": "SALM_2016", + "locus_product": "#", + "locus_description": "tRNA (guanosine(46)-N7)-methyltransferase TrmB", + "locus_uid": "C", + "dna_seq_len": 327, + "dna_seq_hash": "670705cd2a59c4a23a897ac656a888fe", + "aa_seq_len": 109, + "aa_seq_hash": "4277f8bbf1fd6682001da089fea83d04", + "dna_min_len": 228.9, + "dna_max_len": 425.1, + "aa_min_len": 76.3, + "aa_max_len": 141.7, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "3": { + "seq_id": 3, + "locus_name": "locus_4", + "locus_name_alt": "SALM_8644", + "locus_product": "$", + "locus_description": "AZ624_004720", + "locus_uid": "AZ624_004720", + "dna_seq_len": 417, + "dna_seq_hash": "ac1b21798c0f672ad26f5a91ea278590", + "aa_seq_len": 139, + "aa_seq_hash": "0c25367401155278f34832f184ab44e6", + "dna_min_len": 291.9, + "dna_max_len": 542.1, + "aa_min_len": 97.3, + "aa_max_len": 180.7, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "4": { + "seq_id": 4, + "locus_name": "locus_5", + "locus_name_alt": "SALM_1876", + "locus_product": "%", + "locus_description": "SPI-1 type III secretion system invasion lipoprotein InvH", + "locus_uid": "E", + "dna_seq_len": 444, + "dna_seq_hash": "d00defcca8588f21ce16fa1d0ac13389", + "aa_seq_len": 148, + "aa_seq_hash": "c8bf12a8057fc5e541fcd4924136a40d", + "dna_min_len": 310.8, + "dna_max_len": 577.2, + "aa_min_len": 103.6, + "aa_max_len": 192.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "5": { + "seq_id": 5, + "locus_name": "locus_6", + "locus_name_alt": "SALM_640", + "locus_product": "^", + "locus_description": "MOSC domain-containing protein", + "locus_uid": "F", + "dna_seq_len": 543, + "dna_seq_hash": "a11561f2804e2c32c78049f8b9aeb517", + "aa_seq_len": 181, + "aa_seq_hash": "1f6a45ea291940ef2c17ec1cfdd5fbdd", + "dna_min_len": 380.1, + "dna_max_len": 705.9, + "aa_min_len": 126.7, + "aa_max_len": 235.3, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "6": { + "seq_id": 6, + "locus_name": "locus_7", + "locus_name_alt": "SALM_1501", + "locus_product": "&", + "locus_description": "India: Vellore", + "locus_uid": "G", + "dna_seq_len": 606, + "dna_seq_hash": "dc94bf1ec4ff9bed2a1f460cbd958656", + "aa_seq_len": 202, + "aa_seq_hash": "62252b3326997117f127efb88ff09294", + "dna_min_len": 424.2, + "dna_max_len": 787.8, + "aa_min_len": 141.4, + "aa_max_len": 262.6, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "7": { + "seq_id": 7, + "locus_name": "locus_8", + "locus_name_alt": "SALM_756", + "locus_product": "*", + "locus_description": "DNA polymerase III subunit delta'", + "locus_uid": "H", + "dna_seq_len": 642, + "dna_seq_hash": "7ebe74afecf146ec4db816c8deced64f", + "aa_seq_len": 214, + "aa_seq_hash": "2449629747a7c58f0f2cad411db87178", + "dna_min_len": 449.4, + "dna_max_len": 834.6, + "aa_min_len": 149.8, + "aa_max_len": 278.2, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "8": { + "seq_id": 8, + "locus_name": "locus_9", + "locus_name_alt": "SALM_7353", + "locus_product": "1", + "locus_description": "fimbrial assembly chaperone", + "locus_uid": "I", + "dna_seq_len": 684, + "dna_seq_hash": "41ebb36872854b2b33c8c028e23d8ad1", + "aa_seq_len": 228, + "aa_seq_hash": "a75a93991228940fb46d917f238beb5a", + "dna_min_len": 478.8, + "dna_max_len": 889.2, + "aa_min_len": 159.6, + "aa_max_len": 296.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "9": { + "seq_id": 9, + "locus_name": "locus_10", + "locus_name_alt": "SALM_1891", + "locus_product": "200.96", + "locus_description": "5'/3'-nucleotidase SurE", + "locus_uid": "J", + "dna_seq_len": 762, + "dna_seq_hash": "fe04d17ec353c08b903c85fc0ca4dc02", + "aa_seq_len": 254, + "aa_seq_hash": "bd09702e070040e0fc8d2ec3b830812c", + "dna_min_len": 533.4, + "dna_max_len": 990.6, + "aa_min_len": 177.8, + "aa_max_len": 330.2, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "10": { + "seq_id": 10, + "locus_name": "locus_11", + "locus_name_alt": "SALM_1452", + "locus_product": "|", + "locus_description": "1-phosphofructokinase", + "locus_uid": "K", + "dna_seq_len": 858, + "dna_seq_hash": "5b128d659955716833ce42f2bb060212", + "aa_seq_len": 286, + "aa_seq_hash": "4daecf1a6ccae76e1d97a4ce9ee4ff0b", + "dna_min_len": 600.6, + "dna_max_len": 1115.4, + "aa_min_len": 200.2, + "aa_max_len": 371.8, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "11": { + "seq_id": 11, + "locus_name": "locus_12", + "locus_name_alt": "SALM_11020", + "locus_product": "_", + "locus_description": "1", + "locus_uid": "L", + "dna_seq_len": 972, + "dna_seq_hash": "eb72da68c159497d5f0c8eeddc51b5ae", + "aa_seq_len": 324, + "aa_seq_hash": "a1f16dd269dc295e715f2d00f9c26b43", + "dna_min_len": 680.4, + "dna_max_len": 1263.6, + "aa_min_len": 226.8, + "aa_max_len": 421.2, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "12": { + "seq_id": 12, + "locus_name": "locus_13", + "locus_name_alt": "SALM_1934", + "locus_product": "-", + "locus_description": "3.9", + "locus_uid": "M", + "dna_seq_len": 1098, + "dna_seq_hash": "8f300259dcb46224bdc1fe5273107324", + "aa_seq_len": 366, + "aa_seq_hash": "b16bc26e42f5bc4327504b1ec8b2d53d", + "dna_min_len": 768.6, + "dna_max_len": 1427.4, + "aa_min_len": 256.2, + "aa_max_len": 475.8, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "13": { + "seq_id": 13, + "locus_name": "locus_14", + "locus_name_alt": "SALM_2871", + "locus_product": "+", + "locus_description": "@", + "locus_uid": "N", + "dna_seq_len": 1281, + "dna_seq_hash": "b9060019038526aa6fc38d2f7510edc6", + "aa_seq_len": 427, + "aa_seq_hash": "ed11561b2bedaa12c7a28eb0e9346101", + "dna_min_len": 896.7, + "dna_max_len": 1665.3, + "aa_min_len": 298.9, + "aa_max_len": 555.1, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "14": { + "seq_id": 14, + "locus_name": "locus_15", + "locus_name_alt": "SALM_583", + "locus_product": "=", + "locus_description": "DMT family transporter", + "locus_uid": "O", + "dna_seq_len": 1434, + "dna_seq_hash": "bc98c2fe196a68a79036814396513a8d", + "aa_seq_len": 478, + "aa_seq_hash": "16cb2acec887d5861327e04f2705b8ce", + "dna_min_len": 1003.8, + "dna_max_len": 1864.2, + "aa_min_len": 334.6, + "aa_max_len": 621.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "15": { + "seq_id": 15, + "locus_name": "locus_16", + "locus_name_alt": "SALM_780", + "locus_product": "<", + "locus_description": "murein transglycosylase A", + "locus_uid": "P", + "dna_seq_len": 1464, + "dna_seq_hash": "16e55766c603fe33c9e75d8e81743ae2", + "aa_seq_len": 488, + "aa_seq_hash": "b20314e55f9713235e9c4ea5817b56df", + "dna_min_len": 1024.8, + "dna_max_len": 1903.2, + "aa_min_len": 341.6, + "aa_max_len": 634.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "16": { + "seq_id": 16, + "locus_name": "locus_17", + "locus_name_alt": "SALM_1937", + "locus_product": ">", + "locus_description": "GTPase HflX", + "locus_uid": "Q", + "dna_seq_len": 1836, + "dna_seq_hash": "a0d97d985483413f3c18bfe5833ae9ce", + "aa_seq_len": 612, + "aa_seq_hash": "d6012eddc7a6e8d7d40761d00ed71a5a", + "dna_min_len": 1285.2, + "dna_max_len": 2386.8, + "aa_min_len": 428.4, + "aa_max_len": 795.6, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "17": { + "seq_id": 17, + "locus_name": "locus_18", + "locus_name_alt": "SALM_1997", + "locus_product": "?", + "locus_description": "biosynthetic arginine decarboxylase", + "locus_uid": "R", + "dna_seq_len": 1914, + "dna_seq_hash": "b3021e979faa7600756c06dfadfcf14c", + "aa_seq_len": 638, + "aa_seq_hash": "e68e83956ee1d4c685571e5348c8def1", + "dna_min_len": 1339.8, + "dna_max_len": 2488.2, + "aa_min_len": 446.6, + "aa_max_len": 829.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "18": { + "seq_id": 18, + "locus_name": "locus_19", + "locus_name_alt": "SALM_9926", + "locus_product": ",", + "locus_description": "https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662", + "locus_uid": "S", + "dna_seq_len": 2037, + "dna_seq_hash": "a012eee23637b48e39b00808a057e35d", + "aa_seq_len": 679, + "aa_seq_hash": "1302fea1dbd5bb756db34e09b863ad44", + "dna_min_len": 1425.9, + "dna_max_len": 2648.1, + "aa_min_len": 475.3, + "aa_max_len": 882.7, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "19": { + "seq_id": 19, + "locus_name": "locus_20", + "locus_name_alt": "SALM_6064", + "locus_product": ".", + "locus_description": "alpha-2-macroglobulin family protein", + "locus_uid": "T", + "dna_seq_len": 4935, + "dna_seq_hash": "4461918e985715e4a2b07494e1f91326", + "aa_seq_len": 1645, + "aa_seq_hash": "12ff39a1933fa478729b142976b0a659", + "dna_min_len": 3454.5, + "dna_max_len": 6415.5, + "aa_min_len": 1151.5, + "aa_max_len": 2138.5, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + } + }, + "query_data": { + "sample_name": "G13", + "query_seq_data": { + "0": { + "parent_id": "locus_1:0:0:0", + "locus_name": "locus_1:0:0:0", + "seq_id": "locus_1:0:0:0", + "dna_hash": "d17b02d12afa7f832ee37df6f24a8f55", + "dna_len": 102, + "aa_hash": "a931d1f75114576e60538364eb01a05f", + "aa_len": 34, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "1": { + "parent_id": "locus_10:9:0:1", + "locus_name": "locus_10:9:0:1", + "seq_id": "locus_10:9:0:1", + "dna_hash": "fe04d17ec353c08b903c85fc0ca4dc02", + "dna_len": 762, + "aa_hash": "988bf512f0362e276b0e5622fbaa7079", + "aa_len": 254, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "2": { + "parent_id": "locus_11:10:0:2", + "locus_name": "locus_11:10:0:2", + "seq_id": "locus_11:10:0:2", + "dna_hash": "c4266f2f24fdd8e039113c6b0955af9f", + "dna_len": 858, + "aa_hash": "9b9be0e0a2b6f84053716d6c14a0fb9a", + "aa_len": 286, + "start_codon": "gtg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "3": { + "parent_id": "locus_12:11:0:3", + "locus_name": "locus_12:11:0:3", + "seq_id": "locus_12:11:0:3", + "dna_hash": "eb72da68c159497d5f0c8eeddc51b5ae", + "dna_len": 972, + "aa_hash": "2fb77f6d0f615a030a08af74f3eda277", + "aa_len": 324, + "start_codon": "atg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "4": { + "parent_id": "locus_13:12:0:4", + "locus_name": "locus_13:12:0:4", + "seq_id": "locus_13:12:0:4", + "dna_hash": "8f300259dcb46224bdc1fe5273107324", + "dna_len": 1098, + "aa_hash": "fd5140611af6e0d9f95426bf9beaa8b1", + "aa_len": 366, + "start_codon": "atg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "5": { + "parent_id": "locus_13:12:0:5", + "locus_name": "locus_13:12:0:5", + "seq_id": "locus_13:12:0:5", + "dna_hash": "8f300259dcb46224bdc1fe5273107324", + "dna_len": 1098, + "aa_hash": "fd5140611af6e0d9f95426bf9beaa8b1", + "aa_len": 366, + "start_codon": "atg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "6": { + "parent_id": "locus_14:13:0:6", + "locus_name": "locus_14:13:0:6", + "seq_id": "locus_14:13:0:6", + "dna_hash": "2fa0b06ed72e36b4071cab9d0b4f87d0", + "dna_len": 1281, + "aa_hash": "bf5190f310477277da454725d434a8ee", + "aa_len": 427, + "start_codon": "ttg", + "stop_codon": "tag", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "7": { + "parent_id": "locus_15:14:0:7", + "locus_name": "locus_15:14:0:7", + "seq_id": "locus_15:14:0:7", + "dna_hash": "bc98c2fe196a68a79036814396513a8d", + "dna_len": 1434, + "aa_hash": "5d8c3cf06444c3741ebfc57cc92db0d3", + "aa_len": 478, + "start_codon": "gtg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "8": { + "parent_id": "locus_16:15:0:8", + "locus_name": "locus_16:15:0:8", + "seq_id": "locus_16:15:0:8", + "dna_hash": "a9b3cb97dac3cda6e932a49bf9a507bd", + "dna_len": 1464, + "aa_hash": "3ca5f1d7b46eda9460608ef61603c12f", + "aa_len": 488, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "9": { + "parent_id": "locus_17:16:0:9", + "locus_name": "locus_17:16:0:9", + "seq_id": "locus_17:16:0:9", + "dna_hash": "a0d97d985483413f3c18bfe5833ae9ce", + "dna_len": 1836, + "aa_hash": "bdd2b967a7a6337b5973dd723a62fa43", + "aa_len": 612, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "10": { + "parent_id": "locus_18:17:0:10", + "locus_name": "locus_18:17:0:10", + "seq_id": "locus_18:17:0:10", + "dna_hash": "b3021e979faa7600756c06dfadfcf14c", + "dna_len": 1914, + "aa_hash": "42c4a831ee79a27c47138fe96829814b", + "aa_len": 638, + "start_codon": "atg", + "stop_codon": "tag", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "11": { + "parent_id": "locus_19:18:0:11", + "locus_name": "locus_19:18:0:11", + "seq_id": "locus_19:18:0:11", + "dna_hash": "de32372598811d63bcc1a0eaf6872644", + "dna_len": 2037, + "aa_hash": "a48a4e4dc8c7f61a7be06a7f72142198", + "aa_len": 679, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "12": { + "parent_id": "locus_2:1:0:12", + "locus_name": "locus_2:1:0:12", + "seq_id": "locus_2:1:0:12", + "dna_hash": "8b70e777f6bbf2c91ff75947824b5976", + "dna_len": 285, + "aa_hash": "6e403f4ed2da629ea2ebfe18278ed120", + "aa_len": 95, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "13": { + "parent_id": "locus_20:19:0:13", + "locus_name": "locus_20:19:0:13", + "seq_id": "locus_20:19:0:13", + "dna_hash": "4461918e985715e4a2b07494e1f91326", + "dna_len": 4935, + "aa_hash": "ffe361d9273afbab0ed9009dd5aa4041", + "aa_len": 1645, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "14": { + "parent_id": "locus_3:2:0:14", + "locus_name": "locus_3:2:0:14", + "seq_id": "locus_3:2:0:14", + "dna_hash": "670705cd2a59c4a23a897ac656a888fe", + "dna_len": 327, + "aa_hash": "f46f4f7b2362abaf1440c9752e10670e", + "aa_len": 109, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "15": { + "parent_id": "locus_4:3:0:15", + "locus_name": "locus_4:3:0:15", + "seq_id": "locus_4:3:0:15", + "dna_hash": "73790840c76943caac0ebb3b2b3f0b98", + "dna_len": 417, + "aa_hash": "77784601d754a5f36152853592023b08", + "aa_len": 139, + "start_codon": "ctg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "16": { + "parent_id": "locus_5:4:0:16", + "locus_name": "locus_5:4:0:16", + "seq_id": "locus_5:4:0:16", + "dna_hash": "8cf4341689dd00f74adfcc43d1f4a35e", + "dna_len": 444, + "aa_hash": "736cc3184dda2c5ac596f76753272622", + "aa_len": 148, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "17": { + "parent_id": "locus_6:5:0:17", + "locus_name": "locus_6:5:0:17", + "seq_id": "locus_6:5:0:17", + "dna_hash": "a11561f2804e2c32c78049f8b9aeb517", + "dna_len": 543, + "aa_hash": "f7a904be6607ca5a6d9388ad9ca3693a", + "aa_len": 181, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "18": { + "parent_id": "locus_7:6:0:18", + "locus_name": "locus_7:6:0:18", + "seq_id": "locus_7:6:0:18", + "dna_hash": "49d9878c9d3071aa1d2f26cb947b784c", + "dna_len": 606, + "aa_hash": "a1169e1ef4c2882247a9349da07cb6bd", + "aa_len": 202, + "start_codon": "gtg", + "stop_codon": "tag", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "19": { + "parent_id": "locus_8:7:0:19", + "locus_name": "locus_8:7:0:19", + "seq_id": "locus_8:7:0:19", + "dna_hash": "7ebe74afecf146ec4db816c8deced64f", + "dna_len": 642, + "aa_hash": "ad5a47cb3a55985083b963b0cf67f382", + "aa_len": 214, + "start_codon": "atg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "20": { + "parent_id": "locus_9:8:0:20", + "locus_name": "locus_9:8:0:20", + "seq_id": "locus_9:8:0:20", + "dna_hash": "41ebb36872854b2b33c8c028e23d8ad1", + "dna_len": 684, + "aa_hash": "02989d1dbb505d15f06e76ddc9174b7e", + "aa_len": 228, + "start_codon": "atg", + "stop_codon": "tag", + "count_internal_stop": 0, + "dna_ambig_count": 0 + } + }, + "query_hit_columns": [], + "query_hits": { + "0": { + "nucleotide": [ + { + "qseqid": 0, + "sseqid": 0, + "qlen": 102, + "slen": 102, + "qstart": 1, + "qend": 102, + "sstart": 1, + "send": 102, + "length": 102, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 1.8e-51, + "bitscore": 189 + } + ], + "protein": [ + { + "qseqid": 0, + "sseqid": 0, + "qlen": 34, + "slen": 34, + "qstart": 1, + "qend": 34, + "sstart": 1, + "send": 34, + "length": 34, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 4.94e-20, + "bitscore": 64.3 + } + ] + }, + "1": { + "nucleotide": [ + { + "qseqid": 1, + "sseqid": 9, + "qlen": 762, + "slen": 762, + "qstart": 1, + "qend": 762, + "sstart": 1, + "send": 762, + "length": 762, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1408 + } + ], + "protein": [ + { + "qseqid": 1, + "sseqid": 9, + "qlen": 254, + "slen": 254, + "qstart": 1, + "qend": 254, + "sstart": 1, + "send": 254, + "length": 254, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 514.0 + } + ] + }, + "2": { + "nucleotide": [ + { + "qseqid": 2, + "sseqid": 10, + "qlen": 858, + "slen": 858, + "qstart": 1, + "qend": 858, + "sstart": 1, + "send": 858, + "length": 858, + "mismatch": 19, + "pident": 97.786, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1480 + } + ], + "protein": [ + { + "qseqid": 2, + "sseqid": 10, + "qlen": 286, + "slen": 286, + "qstart": 1, + "qend": 286, + "sstart": 1, + "send": 286, + "length": 286, + "mismatch": 17, + "pident": 94.056, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 518.0 + } + ] + }, + "3": { + "nucleotide": [ + { + "qseqid": 3, + "sseqid": 11, + "qlen": 972, + "slen": 972, + "qstart": 1, + "qend": 972, + "sstart": 1, + "send": 972, + "length": 972, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1796 + } + ], + "protein": [ + { + "qseqid": 3, + "sseqid": 11, + "qlen": 324, + "slen": 324, + "qstart": 1, + "qend": 324, + "sstart": 1, + "send": 324, + "length": 324, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 640.0 + } + ] + }, + "4": { + "nucleotide": [ + { + "qseqid": 4, + "sseqid": 12, + "qlen": 1098, + "slen": 1098, + "qstart": 1, + "qend": 1098, + "sstart": 1, + "send": 1098, + "length": 1098, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 2028 + } + ], + "protein": [ + { + "qseqid": 4, + "sseqid": 12, + "qlen": 366, + "slen": 366, + "qstart": 1, + "qend": 366, + "sstart": 1, + "send": 366, + "length": 366, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 754.0 + } + ] + }, + "5": { + "nucleotide": [ + { + "qseqid": 5, + "sseqid": 12, + "qlen": 1098, + "slen": 1098, + "qstart": 1, + "qend": 1098, + "sstart": 1, + "send": 1098, + "length": 1098, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 2028 + } + ], + "protein": [ + { + "qseqid": 5, + "sseqid": 12, + "qlen": 366, + "slen": 366, + "qstart": 1, + "qend": 366, + "sstart": 1, + "send": 366, + "length": 366, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 754.0 + } + ] + }, + "6": { + "nucleotide": [ + { + "qseqid": 6, + "sseqid": 13, + "qlen": 1281, + "slen": 1281, + "qstart": 1, + "qend": 1281, + "sstart": 1, + "send": 1281, + "length": 1281, + "mismatch": 11, + "pident": 99.141, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 2305 + } + ], + "protein": [ + { + "qseqid": 6, + "sseqid": 13, + "qlen": 427, + "slen": 427, + "qstart": 1, + "qend": 427, + "sstart": 1, + "send": 427, + "length": 427, + "mismatch": 9, + "pident": 97.892, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 846.0 + } + ] + }, + "7": { + "nucleotide": [ + { + "qseqid": 7, + "sseqid": 14, + "qlen": 1434, + "slen": 1434, + "qstart": 1, + "qend": 1434, + "sstart": 1, + "send": 1434, + "length": 1434, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 2649 + } + ], + "protein": [ + { + "qseqid": 7, + "sseqid": 14, + "qlen": 478, + "slen": 478, + "qstart": 1, + "qend": 478, + "sstart": 1, + "send": 478, + "length": 478, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 972.0 + } + ] + }, + "8": { + "nucleotide": [ + { + "qseqid": 8, + "sseqid": 15, + "qlen": 1464, + "slen": 1464, + "qstart": 1, + "qend": 1464, + "sstart": 1, + "send": 1464, + "length": 1464, + "mismatch": 15, + "pident": 98.975, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 2621 + } + ], + "protein": [ + { + "qseqid": 8, + "sseqid": 15, + "qlen": 488, + "slen": 488, + "qstart": 1, + "qend": 488, + "sstart": 1, + "send": 488, + "length": 488, + "mismatch": 14, + "pident": 97.131, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 972.0 + } + ] + }, + "9": { + "nucleotide": [ + { + "qseqid": 9, + "sseqid": 16, + "qlen": 1836, + "slen": 1836, + "qstart": 1, + "qend": 1836, + "sstart": 1, + "send": 1836, + "length": 1836, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 3391 + } + ], + "protein": [ + { + "qseqid": 9, + "sseqid": 16, + "qlen": 612, + "slen": 612, + "qstart": 1, + "qend": 612, + "sstart": 1, + "send": 612, + "length": 612, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 1241.0 + } + ] + }, + "10": { + "nucleotide": [ + { + "qseqid": 10, + "sseqid": 17, + "qlen": 1914, + "slen": 1914, + "qstart": 1, + "qend": 1914, + "sstart": 1, + "send": 1914, + "length": 1914, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 3535 + } + ], + "protein": [ + { + "qseqid": 10, + "sseqid": 17, + "qlen": 638, + "slen": 638, + "qstart": 1, + "qend": 638, + "sstart": 1, + "send": 638, + "length": 638, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 1326.0 + } + ] + }, + "11": { + "nucleotide": [ + { + "qseqid": 11, + "sseqid": 18, + "qlen": 2037, + "slen": 2037, + "qstart": 1, + "qend": 2037, + "sstart": 1, + "send": 2037, + "length": 2037, + "mismatch": 16, + "pident": 99.215, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 3674 + } + ], + "protein": [ + { + "qseqid": 11, + "sseqid": 18, + "qlen": 679, + "slen": 679, + "qstart": 1, + "qend": 679, + "sstart": 1, + "send": 679, + "length": 679, + "mismatch": 8, + "pident": 98.822, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 1403.0 + } + ] + }, + "12": { + "nucleotide": [ + { + "qseqid": 12, + "sseqid": 1, + "qlen": 285, + "slen": 285, + "qstart": 1, + "qend": 285, + "sstart": 1, + "send": 285, + "length": 285, + "mismatch": 17, + "pident": 94.035, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 2.3100000000000004e-124, + "bitscore": 433 + } + ], + "protein": [ + { + "qseqid": 12, + "sseqid": 1, + "qlen": 95, + "slen": 95, + "qstart": 1, + "qend": 95, + "sstart": 1, + "send": 95, + "length": 95, + "mismatch": 11, + "pident": 88.421, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 2.89e-51, + "bitscore": 147.0 + } + ] + }, + "13": { + "nucleotide": [ + { + "qseqid": 13, + "sseqid": 19, + "qlen": 4935, + "slen": 4935, + "qstart": 1, + "qend": 4935, + "sstart": 1, + "send": 4935, + "length": 4935, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 9114 + } + ], + "protein": [ + { + "qseqid": 13, + "sseqid": 19, + "qlen": 1645, + "slen": 1645, + "qstart": 1, + "qend": 1645, + "sstart": 1, + "send": 1645, + "length": 1645, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 3332.0 + } + ] + }, + "14": { + "nucleotide": [ + { + "qseqid": 14, + "sseqid": 2, + "qlen": 327, + "slen": 327, + "qstart": 1, + "qend": 327, + "sstart": 1, + "send": 327, + "length": 327, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 5.34e-176, + "bitscore": 604 + } + ], + "protein": [ + { + "qseqid": 14, + "sseqid": 2, + "qlen": 109, + "slen": 109, + "qstart": 1, + "qend": 109, + "sstart": 1, + "send": 109, + "length": 109, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 3.09e-81, + "bitscore": 224.0 + } + ] + }, + "15": { + "nucleotide": [ + { + "qseqid": 15, + "sseqid": 3, + "qlen": 417, + "slen": 417, + "qstart": 1, + "qend": 417, + "sstart": 1, + "send": 417, + "length": 417, + "mismatch": 11, + "pident": 97.362, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 710 + } + ], + "protein": [ + { + "qseqid": 15, + "sseqid": 3, + "qlen": 139, + "slen": 139, + "qstart": 1, + "qend": 139, + "sstart": 1, + "send": 139, + "length": 139, + "mismatch": 9, + "pident": 93.525, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 3.6999999999999996e-98, + "bitscore": 269.0 + } + ] + }, + "16": { + "nucleotide": [ + { + "qseqid": 16, + "sseqid": 4, + "qlen": 444, + "slen": 444, + "qstart": 1, + "qend": 444, + "sstart": 1, + "send": 444, + "length": 444, + "mismatch": 15, + "pident": 96.622, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 737 + } + ], + "protein": [ + { + "qseqid": 16, + "sseqid": 4, + "qlen": 148, + "slen": 148, + "qstart": 1, + "qend": 148, + "sstart": 1, + "send": 148, + "length": 148, + "mismatch": 11, + "pident": 92.568, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 1.5600000000000001e-97, + "bitscore": 268.0 + } + ] + }, + "17": { + "nucleotide": [ + { + "qseqid": 17, + "sseqid": 5, + "qlen": 543, + "slen": 543, + "qstart": 1, + "qend": 543, + "sstart": 1, + "send": 543, + "length": 543, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1003 + } + ], + "protein": [ + { + "qseqid": 17, + "sseqid": 5, + "qlen": 181, + "slen": 181, + "qstart": 1, + "qend": 181, + "sstart": 1, + "send": 181, + "length": 181, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 1.7899999999999999e-140, + "bitscore": 380.0 + } + ] + }, + "18": { + "nucleotide": [ + { + "qseqid": 18, + "sseqid": 6, + "qlen": 606, + "slen": 606, + "qstart": 1, + "qend": 606, + "sstart": 1, + "send": 606, + "length": 606, + "mismatch": 15, + "pident": 97.525, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1037 + } + ], + "protein": [ + { + "qseqid": 18, + "sseqid": 6, + "qlen": 202, + "slen": 202, + "qstart": 1, + "qend": 202, + "sstart": 1, + "send": 202, + "length": 202, + "mismatch": 14, + "pident": 93.069, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 2.36e-141, + "bitscore": 384.0 + } + ] + }, + "19": { + "nucleotide": [ + { + "qseqid": 19, + "sseqid": 7, + "qlen": 642, + "slen": 642, + "qstart": 1, + "qend": 642, + "sstart": 1, + "send": 642, + "length": 642, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1186 + } + ], + "protein": [ + { + "qseqid": 19, + "sseqid": 7, + "qlen": 214, + "slen": 214, + "qstart": 1, + "qend": 214, + "sstart": 1, + "send": 214, + "length": 214, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 1.75e-157, + "bitscore": 426.0 + } + ] + }, + "20": { + "nucleotide": [ + { + "qseqid": 20, + "sseqid": 8, + "qlen": 684, + "slen": 684, + "qstart": 1, + "qend": 684, + "sstart": 1, + "send": 684, + "length": 684, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1264 + } + ], + "protein": [ + { + "qseqid": 20, + "sseqid": 8, + "qlen": 228, + "slen": 228, + "qstart": 1, + "qend": 228, + "sstart": 1, + "send": 228, + "length": 228, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 6.32e-172, + "bitscore": 463.0 + } + ] + } + }, + "locus_profile": { + "locus_1": { + "nucleotide": [ + "0" + ], + "protein": [ + "0" + ] + }, + "locus_2": { + "nucleotide": [ + "12" + ], + "protein": [ + "12" + ] + }, + "locus_3": { + "nucleotide": [ + "14" + ], + "protein": [ + "14" + ] + }, + "locus_4": { + "nucleotide": [ + "15" + ], + "protein": [ + "15" + ] + }, + "locus_5": { + "nucleotide": [ + "16" + ], + "protein": [ + "16" + ] + }, + "locus_6": { + "nucleotide": [ + "17" + ], + "protein": [ + "17" + ] + }, + "locus_7": { + "nucleotide": [ + "18" + ], + "protein": [ + "18" + ] + }, + "locus_8": { + "nucleotide": [ + "19" + ], + "protein": [ + "19" + ] + }, + "locus_9": { + "nucleotide": [ + "20" + ], + "protein": [ + "20" + ] + }, + "locus_10": { + "nucleotide": [ + "1" + ], + "protein": [ + "1" + ] + }, + "locus_11": { + "nucleotide": [ + "2" + ], + "protein": [ + "2" + ] + }, + "locus_12": { + "nucleotide": [ + "3" + ], + "protein": [ + "3" + ] + }, + "locus_13": { + "nucleotide": [ + "5", + "4" + ], + "protein": [ + "5", + "4" + ] + }, + "locus_14": { + "nucleotide": [ + "6" + ], + "protein": [ + "6" + ] + }, + "locus_15": { + "nucleotide": [ + "7" + ], + "protein": [ + "7" + ] + }, + "locus_16": { + "nucleotide": [ + "8" + ], + "protein": [ + "8" + ] + }, + "locus_17": { + "nucleotide": [ + "9" + ], + "protein": [ + "9" + ] + }, + "locus_18": { + "nucleotide": [ + "10" + ], + "protein": [ + "10" + ] + }, + "locus_19": { + "nucleotide": [ + "11" + ], + "protein": [ + "11" + ] + }, + "locus_20": { + "nucleotide": [ + "13" + ], + "protein": [ + "13" + ] + } + } + }, + "query_hit_columns": [ + "qseqid", + "sseqid", + "qlen", + "slen", + "qstart", + "qend", + "sstart", + "send", + "length", + "mismatch", + "pident", + "qcovhsp", + "qcovs", + "sstrand", + "evalue", + "bitscore" + ] +} \ No newline at end of file diff --git a/tests/test_data/outputs/search/G14/blast/nucleotide/hsps.txt b/tests/test_data/outputs/search/G14/blast/nucleotide/hsps.txt new file mode 100755 index 0000000..3324a8b --- /dev/null +++ b/tests/test_data/outputs/search/G14/blast/nucleotide/hsps.txt @@ -0,0 +1,21 @@ +0 0 102 102 1 102 1 102 102 0 100.000 100 100 plus 1.80e-51 189 +1 9 762 762 1 762 1 762 762 0 100.000 100 100 plus 0.0 1408 +2 10 858 858 1 858 1 858 858 0 100.000 100 100 plus 0.0 1585 +3 11 972 972 1 972 1 972 972 0 100.000 100 100 plus 0.0 1796 +4 12 1098 1098 1 1098 1 1098 1098 0 100.000 100 100 plus 0.0 2028 +5 12 1098 1098 1 1098 1 1098 1098 0 100.000 100 100 plus 0.0 2028 +6 13 1281 1281 1 1281 1 1281 1281 0 100.000 100 100 plus 0.0 2366 +7 14 1434 1434 1 1434 1 1434 1434 0 100.000 100 100 plus 0.0 2649 +8 15 1464 1464 1 1464 1 1464 1464 0 100.000 100 100 plus 0.0 2704 +9 16 1836 1836 1 1836 1 1836 1836 0 100.000 100 100 plus 0.0 3391 +10 17 1914 1914 1 1914 1 1914 1914 0 100.000 100 100 plus 0.0 3535 +11 18 2037 2037 1 2037 1 2037 2037 0 100.000 100 100 plus 0.0 3762 +12 1 285 285 1 285 1 285 285 0 100.000 100 100 plus 1.03e-152 527 +13 19 4935 4935 1 4935 1 4935 4935 0 100.000 100 100 plus 0.0 9114 +14 2 327 327 1 327 1 327 327 0 100.000 100 100 plus 5.34e-176 604 +15 3 417 417 1 417 1 417 417 0 100.000 100 100 plus 0.0 771 +16 4 444 444 1 444 1 444 444 0 100.000 100 100 plus 0.0 821 +17 5 543 543 1 543 1 543 543 0 100.000 100 100 plus 0.0 1003 +18 6 606 606 1 606 1 606 606 0 100.000 100 100 plus 0.0 1120 +19 7 642 642 1 642 1 642 642 0 100.000 100 100 plus 0.0 1186 +20 8 684 684 1 684 1 684 684 0 100.000 100 100 plus 0.0 1264 diff --git a/tests/test_data/outputs/search/G14/blast/nucleotide/queries.fasta b/tests/test_data/outputs/search/G14/blast/nucleotide/queries.fasta new file mode 100755 index 0000000..2da4d02 --- /dev/null +++ b/tests/test_data/outputs/search/G14/blast/nucleotide/queries.fasta @@ -0,0 +1,42 @@ +>0 +atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa +>1 +atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa +>2 +gtgaacctggtgaaaaccccgggtctgcacgcggcgggcaaaggcattaacgttgccaatgtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccaacttcaactttcccggctttgacgtgactcccgcagactgggaacgctttgttaacgacttcctgagctggctgggtcagttcgatatggtctgcgtaagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatcgttgatactagccgtgtagagttagtcgccggtcttatagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcccgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgcgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggaaaataccgtcggcgcaggcggttccatggttggcggatggatttacggcctgctgatgcgcgagtccaccgaacatacgctgcgcctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga +>3 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>4 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>5 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>6 +ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctattacgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccagcgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcggaaaggcgggattggttcgcgcggtccgggtgaaacccagctcgaagccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgagaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgcagctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaattccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtccgcaggtacgaatgccgattgtcgactggcgtcgcttctgtaaacaagaaccagcgttgatcgaatacgtgatctag +>7 +gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga +>8 +atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctcgtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatctgcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacggtgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcactcaaccggaagggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaaggctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgatgcgctgtcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtaagctgatcagccttgtgcgcaaccttaatcaactgctcaaaagcgagcacgaacgttataacaaataccgcacgagcctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatctgcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcatagcccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttctggccgacagcatcccacacagaaagaggaataa +>9 +atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa +>10 +atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag +>11 +atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacatcgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccacagtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggtcgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattacttgaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcatacagggcggtacggaagtttgtcagcaaaaactggataacatgcgcaacgcaggcgtaaaagtgtacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggggtcagcgcagagctcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacagaagaccggcaagctcggcgagatcctgttctttatgcgggcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccagcaatgactggcagttcgacggcgacgcggaaactattgcacactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgagatttccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa +>12 +atgaaactttttctgacaacggcagcgctaactgcgaccttgacttccggcatgggatttgccagcgataccgtgatcccctgggcgactaacagtggcggtacggaaagcacgcatatcgcggcaatgggcgaggatgtgaatgcgcagcatcagcacatcatccacacgcatgaaggcgtatgtgccgcaaattccggcaccattcaggctgatgaagccgcgctgaccagcaacaagccaccggtacaagtccaaccggaattgttgccgcatcagggctaa +>13 +atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa +>14 +atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa +>15 +ctgaaactaatacctttttatttgctcgcgctattttctgcctcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaacccctgcagataacccgccttataccgtgacaatattgccgccagtagaagcctgcggggaaaaatgtgtgaaactgaacgtagagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaacgacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctacaaccgacagctgcaattacttagggctggcgtaataccgaaaaataaaaagtggaaatggataacgaagatggaaaattactggttcaggcgggtaaaagcgcactaa +>16 +atgaaaaaatttgatagctgtcatcctgtctttttactgatcggctgtgcccaggtgcctctccgttcctccgtgagcaaaccggtacagcaacctagcgctcagaaagagcaactggccaacgcaaatggtattgatgagtgtcagtcacttccgtatgtgccttcagaccttgcgaagaataaatcattatcaaaccagatcgctgataataccgcatcaaaaaatagcgcaatcagctcacgcattttttgcgaaaaatataagcaaaccaaagaacaggcgcttaccttcttccaggaacttccacaatacatgcgttcgaaagaagttgaagagcaacacatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa +>17 +atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa +>18 +gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggactcacaacatgcctgtagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgccagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcgccatccagtcggcaacctggttttcggcaggcaggtcgcgtagggcggttaaaaaaatgatggattgcggcagcggtatttattcttcaatcaatacgttactgaaaaattcgcagattaaaaatatagtcattttaacccataatcattgtttgacgtatattgtaaaaaataagcgcggcgtgaagtttgatccggagtatcttaatgcgctcgttatgtatgcggaaaacggaaaactccttttagacggtgaatttgttcccggctag +>19 +atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga +>20 +atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/search/G14/blast/protein/hsps.txt b/tests/test_data/outputs/search/G14/blast/protein/hsps.txt new file mode 100755 index 0000000..6423de4 --- /dev/null +++ b/tests/test_data/outputs/search/G14/blast/protein/hsps.txt @@ -0,0 +1,21 @@ +0 0 34 34 1 34 1 34 34 0 100.000 100 100 N/A 4.94e-20 64.3 +1 9 254 254 1 254 1 254 254 0 100.000 100 100 N/A 0.0 514 +2 10 286 286 1 286 1 286 286 0 100.000 100 100 N/A 0.0 579 +3 11 324 324 1 324 1 324 324 0 100.000 100 100 N/A 0.0 640 +4 12 366 366 1 366 1 366 366 0 100.000 100 100 N/A 0.0 754 +5 12 366 366 1 366 1 366 366 0 100.000 100 100 N/A 0.0 754 +6 13 427 427 1 427 1 427 427 0 100.000 100 100 N/A 0.0 869 +7 14 478 478 1 478 1 478 478 0 100.000 100 100 N/A 0.0 972 +8 15 488 488 1 488 1 488 488 0 100.000 100 100 N/A 0.0 1004 +9 16 612 612 1 612 1 612 612 0 100.000 100 100 N/A 0.0 1241 +10 17 638 638 1 638 1 638 638 0 100.000 100 100 N/A 0.0 1326 +11 18 679 679 1 679 1 679 679 0 100.000 100 100 N/A 0.0 1419 +12 1 95 95 1 95 1 95 95 0 100.000 100 100 N/A 1.23e-68 191 +13 19 1645 1645 1 1645 1 1645 1645 0 100.000 100 100 N/A 0.0 3332 +14 2 109 109 1 109 1 109 109 0 100.000 100 100 N/A 3.09e-81 224 +15 3 139 139 1 139 1 139 139 0 100.000 100 100 N/A 2.34e-106 290 +16 4 148 148 1 148 1 148 148 0 100.000 100 100 N/A 1.04e-110 302 +17 5 181 181 1 181 1 181 181 0 100.000 100 100 N/A 1.79e-140 380 +18 6 202 202 1 202 1 202 202 0 100.000 100 100 N/A 3.71e-154 416 +19 7 214 214 1 214 1 214 214 0 100.000 100 100 N/A 1.75e-157 426 +20 8 228 228 1 228 1 228 228 0 100.000 100 100 N/A 6.32e-172 463 diff --git a/tests/test_data/outputs/search/G14/blast/protein/queries.fasta b/tests/test_data/outputs/search/G14/blast/protein/queries.fasta new file mode 100755 index 0000000..285e5c8 --- /dev/null +++ b/tests/test_data/outputs/search/G14/blast/protein/queries.fasta @@ -0,0 +1,42 @@ +>0 +MYDPPFLEALMITASFFAIFIIIVVSVLLLEGD* +>1 +MRILLSNDDGVHAPGIQTLAKALREFADVQVVAPDRNRSGASNSLTLESSLRTFTFDNGDIAVQMGTPTDCVYLGVNALMRPRPDIVVSGINAGPNLGDDVIYSGTVAAAMEGRHLGFPALAVSLNGYQHYDTAAAVTCALLRGLSREPLRTGRILNVNVPDLPLAQVKGIRVTRCGSRHPADKVIPQEDPRGNTLYWIGPPGDKYDAGPDTDFAAVDEGYVSVTPLHVDLTAHSAHDVVSDWLDSVGVGTQW* +>2 +VNLVKTPGLHAAGKGINVANVLKDLGIDVTVGGFLGKDNQDGFQQLFSELGIANRFQVVQGRTRINVKLTEKDGEVTNFNFPGFDVTPADWERFVNDFLSWLGQFDMVCVSGSLPAGVSPEAFTDWMTRLRSQCPCIIVDTSRVELVAGLIAAPWLVKPNRRELEIWAGRKLPEMKDVIDAAPALREQGIAHVVISLGAEGALRVNASGEWIAKPPAVDVENTVGAGGSMVGGWIYGLLMRESTEHTLRLATAVAALAVSQSNVGITDRPQLAAMMARVDLQPFN* +>3 +MNKNKYSTPLLMLATILAGMLSPMQSAVNGQLGHWLQDGNACAVISFASGLVVMFFIIIARKETRQQFASIPTLIKKRKIPLWNWFAGLCGAMVVFSEGASASALGVATFQTALISALLLSGLLCDRFGIGVEEKKYFTPWRITGALFAVIATIFVVSPQWHSTSFILLAILPFLAGLLAGWQPAGNAKVAEATGSMLVSITWNFIVGFCVLGAALAIRIALGHVTIQLPDTWWMYLGGPLGLLSIGLMAILVRGLGLLMLGVASTAGQLLGSVLIDELIPSLGNTVYLVTIIGTLFALVGAIVTTIPEYRASKMAQKMEVSE* +>4 +MKGRWAKYVATGVMLAMLAACSSKPTDRGQQYKDGKFTQPFSLVNQPDAVGAPINAGDFAEQVDQIRSASPRLYTNQSNVYNAVQNWLRSGGDTRTMRQFGIDAWQMEGTDNYGNVQFTGYYTPVVQARHTRQGAFQYPIYSMPPKRGRLPSRAQIYAGALSDKYILAWSNSLMDNFIMDVQGSGYIDFGDGSPLNFFSYAGKNGWPYRSIGKVLIDRGEVKKEDMSMQAIREWGEKHSEAEVRELLEQNPSFVFFKPQSFAPVKGASAVPLIGRASVASDRSIIPPGTTLLAEVPLLDNNGKFSGQYELRLMVALDVGGAIKGQHFDIYQGIGPDAGHRAGWYNHYGRVWVLKSAPGAGNVFSG* +>5 +MKGRWAKYVATGVMLAMLAACSSKPTDRGQQYKDGKFTQPFSLVNQPDAVGAPINAGDFAEQVDQIRSASPRLYTNQSNVYNAVQNWLRSGGDTRTMRQFGIDAWQMEGTDNYGNVQFTGYYTPVVQARHTRQGAFQYPIYSMPPKRGRLPSRAQIYAGALSDKYILAWSNSLMDNFIMDVQGSGYIDFGDGSPLNFFSYAGKNGWPYRSIGKVLIDRGEVKKEDMSMQAIREWGEKHSEAEVRELLEQNPSFVFFKPQSFAPVKGASAVPLIGRASVASDRSIIPPGTTLLAEVPLLDNNGKFSGQYELRLMVALDVGGAIKGQHFDIYQGIGPDAGHRAGWYNHYGRVWVLKSAPGAGNVFSG* +>6 +LFDRYDAGEQAVLVHIYFSQDKDMEDLQEFESLVYYAGVEAMQVITGSRKAPHPKYFVGEGKAVEIAEAVKATGAAVVLFDHALSPAQERNLERLCECRVIDRTGLILDIFAQRARTHEGKLQVELAQLRHLATRLVRGWTHLERRKGGIGSRGPGETQLEADRRLLRNRIVQIQSRLEKVEKQREQGRQSRIKADVPTVSLVGYTNAGKSTLFNQITEARVYAADQLFATLDPTLRRIDVADVGETVLADTVGFIRHLPYDLVAAFKATLQETRQATQLLHVVDAADVRVQENIEAVNTVLEEIDAHEFPTLMVMNKIDMLDDFEPRIDRDEENKPIRVWLSAQSGVGIPQLFQALTERLSGEVAQHTLRLLPQEGRLRSRFYQLQAIEKEWMEEDGSVSPQVRMPIVDWRRFCKQEPALIEYVI* +>7 +VAQRILVLGASGYIGQHLVFALSQQGHQVRAAARRVERLEKHRLANVSCHKVDLHWPENLPALLRDIDTVYYLVHGMGEGGDFIAHERQAALNVRDALRQTPVKQLIFLSSLQAPAHEQSDHLRARQLTADTLRDAGVPVTELRAGIIVGAGSAAFEVMRDMVYNLPILTPPRWVRSRTTPIALENLLYYLVGLLEHPAHEHRILEAAGPQVLSYQQQFERFMAVSGKRRPLIPVPFPTRWISVWFLNVITSVPPTTAKALIQGLRHDLLADDAALKKLIPQTLITFDDAVRRTLKEEEKLVNSSDWGYDALAFARWRPEYGYFPKQAGFTAQTPASLSALWQVVNRLGGKEGYFFGNILWQTRAAMDRLVGHKLAKGRPSHTLLKPGDTVDSWKVIIVEPEKQLTLLFGMKAPGLGRLSFTLHDKGRYREIDVRAWWHPHGMPGLIYWLLMIPAHLFIFRGMARRIARLAEQITEK* +>8 +MNKFARHFLPLSLRVRFLLATAGVVLVLSLAYGIVALVGYSVSFDKTTFRLLRGESNLFYTLAKWENNKICVELPENLDMQSPTMTVIYDETGKLLWTQRNIPWLIKSTQPEGLKTNGFHEIETNVDATSTLLSEDHSAQEKLKEVREDDDDAEMTHSVAVNIYPATTRMPQLTIVVVDTIPIELKGSYMVWSWFVYVLAANLLLVIPLLWIAAWWSLRPIDALSREVRELEDHHREMLNPETTRKLISLVRNLNQLLKSEHERYNKYRTSLTDLTHSLKTPLAVLQSTLRSLRNEKMSVSKAEPVMLEQISRISQQIGYYLHRASMRGSGVLLSRELHPVAPLLDNLISALNKVYLRKGVNISMDISPEISFVGEQNDFVEVMGNVLDNACKYCLEFVEISARQTDDHLHIFVEDDGPGIAHSKRSLVFDRGQRADTLRPGQGVGLAVAREITEQYAGQIIASDSLLGGARMEVVSGRQHPTQKEE* +>9 +MTIQKRLLEAVEQKLLRPIDAQFALTVAGNDDPAVTLAAALLSHDAGEGHVCLPLSRLTLTEEAHPLLVAWISETATPIDWKKRLLASAAVSCGDSPAPLILCGDRLYLNRMWCNERTVARFFNEVNQAIAVDEDQLSRILDALFPPTDEVNWQKVAAAVALTRRISVISGGPGTGKTTTVAKLLAALIQMADGERCRIRLAAPTGKAAARLTESLGAALRQLPLTDAQKKRIPEDASTLHRLLGAQPGSQRLRHHAGNPLHLDVLVVDEASMIDLPMMSRLIDALPPHGRVIFLGDRDQLASVEAGAVLGDICAYVNAGFTAERARQLSRLTGSAIPAGAGTQAASLRDSLCLLQKSYRFGSDSGIGKLAAAINCGDRSAIQAVFQQGFSDIEKRTLQSSDDYAGMLDEALAGYGRYLRLLHEKAAPEAILQAFNEYQLLCALREGPFGVRGLNDRIEQAMVQQRKIQRHPHSRWYEGRPVMIARNDSALGLFNGDIGIALDRGQGLRVWFVMPDGTIKSVQPSRLPEHDTTWAMTVHKSQGSEFDHAALILPSQRSPVVTRELVYTAVTRARRRLSLYADERILAGAIVTRTERRSGLATLFDEVSRIG* +>10 +MQEVAMSSQEASKMLRTYNIAWWGNNYYDVNELGHISVCPDPDVPEARVDLAKLVKAREAQGQRLPALFCFPQILQHRLRSINAAFKRARESYGYNGDYFLVYPIKVNQHRRVIESLIHSGEPLGLEAGSKAELMAVLAHAGMTRSVIVCNGYKDREYIRLALIGEKMGHKVYLVIEKMSEIAIVLEEAERLNVVPRLGVRARLASQGSGKWQSSGGEKSKFGLAATQVLQLVETLRDAGRLDSLQLLHFHLGSQMANIRDIATGVRESARFYVELHKLGVNIQCFDVGGGLGVDYEGTRSQSDCSVNYGLNEYANNIIWAIGDACEEHGLPHPTVITESGRAVTAHHTVLVSNIIGVERNEYTDPTAPAEDAPRALQNLWETWQEMHKPGTRRSLREWLHDSQMDLHDIHIGYSSGAFSLQERAWAEQLYLSMCHEVQKQLDPQNRAHRPIIDELQERMADKMYVNFSLFQSMPDAWGIDQLFPVLPLEGLDQVPERRAVLLDITCDSDGAIDHYIDGDGIATTMPMPEYDPENPPMLGFFMVGAYQEILGNMHNLFGDTEAVDVFVFPDGSVEVELSDEGDTVADMLQYVQLDPKTLLTHFRDQVKQTDLDDALQQQFLEEFEAGLYGYTYLEDE* +>11 +MNSLPQRSTDFELTTSQDGFALSWQQRLILRHSAENPCLWIGAGVADIDMFRGNFSIKDKLNEKIALTEATVSELPDGWLVQFSRGATISATLRISADEAGRLTLDLQNDDLHHNRIWLRLAANPDDHIYGCGEQFSYFDLRGKPFPLWTSEQGVGRNKTSYVTWQADCKENAGGDYYLTFFPQPTFVSTQKYYCHVDNSCYMNFDFSAPEYHELALWEDKTTLRFECADTYIALLEKLTALLGRQPELPDWVYDGVTLGIQGGTEVCQQKLDNMRNAGVKVYGIWAQDWSGIRMTSFGKRVMWNWKWNSDNYPQLDSRIKQWKEEGVQFLSYINPYVASDKDLCAEAARHGYLAKDATGGDYLVEFGEFYGGVVDLTNPEAYDWFKDVIKKNMIALGCSGWMADFGEYLPTDTYLHNGVSAELMHNAWPALWAKCNYEALQKTGKLGEILFFMRAGYTGSQKYSTMMWAGDQNVDWSLDDGLASVVPAALSLAMTGHGLHHSDIGGYTTLFDMKRSKELLLRWCDFSAFTPMMRTHEGNRPSNDWQFDGDAETIAHFARMTTVFTTLKPYLKQAVAQNAATGLPVMRPLFLHYENDAATYTLKYQYLLGQDLLVAPVHEQGRCDWTLYLPEDHWVNIWTGEAHHGGEISVDAPIGKPPVFYRAKSEWALLFASLRNI* +>12 +MKLFLTTAALTATLTSGMGFASDTVIPWATNSGGTESTHIAAMGEDVNAQHQHIIHTHEGVCAANSGTIQADEAALTSNKPPVQVQPELLPHQG* +>13 +MKHLRVVACMIMLALAGCDNNDKTAPTTKSEAPAVAQPSPAQDPSQLQKLAQQSQGKALTLLDASEAQLDGAATLVLTFSIPLDPEQDFSRVVHVVDKKSGSVDGAWELAPNLKELRLRHLEPERVLVVTVDPAVKALNNATFGKSYEKTITTRDVQPSVGFASRGSLLPGKIAEGLPVMALNVNHVDVNFFRVKPGSLASFVSQWEYRSSLSNWESDNLLKMADLVYTGRFDLNPARNTREKLLLPLSDIKPLQQAGVYVAVMNQAGHYNYSNAATLFTLSDIGVSAHRYHSRLDIFTQSLENGAAQSGIEIVLLNDKGQTLAQATSDAQGHVQLEADKAAALLLARKEGQTTLLDLTLPALDLSEFNVAGAPGYSKQFFMFGPRDLYRPGETVILNGLLRDSDGKTLPDQPVKLEVVKPDGQVMRTVVSQPENGLYRLNYPLDINAPTGLWHVRANTGDNLLRSWDFHVEDFMPERMALNLTAQKTPLAPADEVKFSVVGYYLYGAPANGNTLQGQLFLRPLRDAVAALPGFQFGNIAEENLSRSLDEVQLTLDKGGRGEVSAASQWQEAHSPLQVILQASLLESGGRPVTRRVEQAIWPADTLPGIRPQFAAKAVYDYRTDTTVNQPIVDEDSNAAFDIVYANAQGEKKAVSGLQVRLIRERRDYYWNWSESEGWQSQFDQKDLVEGEQTLDLNADETGKVSFPVEWGAYRLEVKAPNETVSSVRFWAGYSWQDNSDGSGAARPDRVTLKLDKANYRPGDTMKLHIAAPVAGKGYAMVESSDGPLWWQAIDVPAQGLELTIPVDKTWNRHDLYLSTLVVRPGDKSRSATPKRAVGLLHLPLGDDNRRLDLALESPAKMRPNQPLTVRVKASVKHGEMPKQINVLVSAVDSGVLNITDYATPDPWQAFFGQKRYGADIYDIYGQVIEGQGRLAALRFGGDGDDLTRGGKPPVNHANIIAQQAQPITLNEQGEGVVTLPIGDFNGELRVMAQAWTADDFGRGESKVVVAAPVIAELNMPRFLAGGDVSRLVLDVTNLTDRPQTLNIALAASGLLELLSQQPQPVNLAPGVRTTLFVPVRALEGFGEGEIQATISGLNLPGETLGAQHKQWQIGVRPAWPAQTVNSGIALAPGESWHVPEQHLANVSPATLQGQLLLSGKPPLNLARYIRELKAYPYGCLEQTTSGLFPALYTNAAQLQSLGITGDSDEKRRAAVDIGISRILQMQRDNGGFALWDENGAEEPWLTAYAMDFLIRAGEQGYSVPPEAINRGNERLLRYLQDPGTMLIRYSDNTQASTFAAQAYAALVLARQQKAPLGALREIWERRSQAASGLPLMQLGIALNTMGDARRGEEAITLALNTPRQDERQWIADYGSSLRDNALMLSLLEENNLRPDAQNALLSSLSEQAFGQRWLSTQENNALFLAAHSRQASAGAWQAQTSLEAQPLSGDKALTRNLDADQLAALEVTNTGSQPLWLRLDSSGYPSSAPEPASNVLQIERQILGTDGQRKSLSSLRSGELVLVWLTVVADRNVPDALVVDLLPAGLELENQNLADSSASLPESGSEVQNLLNQMQQADIQYMEFRDDRFVAAVVVNEGQPVTLVYLARAVTPGTYQLAQPQVESMYAPQWRATGASEGLLIVTP* +>14 +MAKNRSRRLRKKMHIDEFQELGFSVAWRFPEGTSEEQIDKTVDDFINDVIEPNKLAFDGSGYLAWEGLICMQEIGKCTEEHQAIVRKWLEARNLEEVRTSELFDVWWD* +>15 +LKLIPFYLLALFSASSGATEINACKDLIGTWKTPADNPPYTVTILPPVEACGEKCVKLNVEYELDVTHRNALYCHERQEGVKGQGPMVIAFEGAYGGHAIGTYNRQLQLLRAGVIPKNKKWKWITKMENYWFRRVKAH* +>16 +MKKFDSCHPVFLLIGCAQVPLRSSVSKPVQQPSAQKEQLANANGIDECQSLPYVPSDLAKNKSLSNQIADNTASKNSAISSRIFCEKYKQTKEQALTFFQELPQYMRSKEVEEQHMTEFKKVLLEPGSKNLSIYQTLLAAHERLQAL* +>17 +MRIKPDDNWRWYYDEEHDRMMLDLANGMLFRSRFSRKMLTPDAFCPTGFCVDDAALYFSFEEKCRDFELTKEQRAELVLNALVAIRYLKPQMPKSWHFVAHGEMWTPGTGDAASVWLSDTAEQVNLLVVEPGENAALCLLAQPGVVIAGRTMQLGDAIKIMNDRLKPQVHCHSFSLEQAV* +>18 +VLAFTLRFIKNKRYFAILAGALVIIAGLDSQHACSGNGLPQINGKALAALAKQHPVVVLFRHAERCDRSDNTCLPDSTGITVNGAQDARALGKAFSADIQNYNLYSSNTVRAIQSATWFSAGRSRRAVKKMMDCGSGIYSSINTLLKNSQIKNIVILTHNHCLTYIVKNKRGVKFDPEYLNALVMYAENGKLLLDGEFVPG* +>19 +MGSNYIVIEGLEGAGKTTARDVVVETLEQLGIRNMIFTREPGGTQLAEKLRSLVLDIRSVGDEVITDKAEVLMFYAARVQLVETVIKPALAQGVWVIGDRHDLSTQAYQGGGRGIDQTMLATLRDAVLGDFRPDLTLYLDVTPEVGLKRARARGDLDRIEQESFDFFNRTRARYLELAAQDSRIRTIDATQPLDAVMRDIRATVTKWVQEQAA* +>20 +MKHIKKSVLVVLLTSHVAHASIVVGGTRLVFDGNNDESSINVENKDSKANLVQSWLSVADPQVTNKQVFIITPPLFRLDAGQKNSIRVIRSGAPLPADRESMYWLNIKGIPSIDDNASANRVEISINTQIKLIYRPPALTKSTPDSQSQQLKWQTAGDVITVNNPTPYYMNFASVTLNSHEVKSATFVPPKSSASFKLSSTAAPHGTVTWRLISDYGMSLEPHSGSF* diff --git a/tests/test_data/outputs/search/G14/run.json b/tests/test_data/outputs/search/G14/run.json new file mode 100755 index 0000000..179734a --- /dev/null +++ b/tests/test_data/outputs/search/G14/run.json @@ -0,0 +1,27 @@ +{ + "analysis_start_time": "10/06/2024 11:12:49", + "parameters": { + "query": "locidex/extract/G14/raw.extracted.seqs.fasta", + "outdir": "locidex/search/G14", + "name": "G14", + "db": "locidex/db", + "config": null, + "min_evalue": 0.0001, + "min_dna_len": 1, + "min_aa_len": 1, + "max_dna_len": 10000000, + "max_aa_len": 10000000, + "min_dna_ident": 80.0, + "min_aa_ident": 80.0, + "min_dna_match_cov": 80.0, + "min_aa_match_cov": 80.0, + "max_target_seqs": 10, + "n_threads": 8, + "format": null, + "translation_table": 11, + "annotate": false, + "force": true + }, + "result_file": "locidex/search/G14/seq_store.json", + "analysis_end_time": "10/06/2024 11:12:51" +} \ No newline at end of file diff --git a/tests/test_data/outputs/search/G14/seq_store.json b/tests/test_data/outputs/search/G14/seq_store.json new file mode 100755 index 0000000..9ee016f --- /dev/null +++ b/tests/test_data/outputs/search/G14/seq_store.json @@ -0,0 +1,1801 @@ +{ + "db_info": { + "db_name": "Locidex Test Database", + "db_version": "1.0.0", + "db_date": "2024/06/06", + "db_author": "James Robertson", + "db_desc": "Using Salmonella GCA_000007545.2 as source data", + "db_num_seqs": 20, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" + }, + "db_seq_info": { + "0": { + "seq_id": 0, + "locus_name": "locus_1", + "locus_name_alt": "SALM_11273", + "locus_product": "!", + "locus_description": "hypothetical protein", + "locus_uid": "1", + "dna_seq_len": 102, + "dna_seq_hash": "d17b02d12afa7f832ee37df6f24a8f55", + "aa_seq_len": 34, + "aa_seq_hash": "84ac553cb45dd790c497c27152888f02", + "dna_min_len": 71.4, + "dna_max_len": 132.6, + "aa_min_len": 23.8, + "aa_max_len": 44.2, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "1": { + "seq_id": 1, + "locus_name": "locus_2", + "locus_name_alt": "SALM_120", + "locus_product": "@", + "locus_description": "outer membrane protein", + "locus_uid": "B", + "dna_seq_len": 285, + "dna_seq_hash": "e35184c8ff18e9116fc8faef20532f56", + "aa_seq_len": 95, + "aa_seq_hash": "2d9e7f7f0d11bf02db860b0799e7924d", + "dna_min_len": 199.5, + "dna_max_len": 370.5, + "aa_min_len": 66.5, + "aa_max_len": 123.5, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "2": { + "seq_id": 2, + "locus_name": "locus_3", + "locus_name_alt": "SALM_2016", + "locus_product": "#", + "locus_description": "tRNA (guanosine(46)-N7)-methyltransferase TrmB", + "locus_uid": "C", + "dna_seq_len": 327, + "dna_seq_hash": "670705cd2a59c4a23a897ac656a888fe", + "aa_seq_len": 109, + "aa_seq_hash": "4277f8bbf1fd6682001da089fea83d04", + "dna_min_len": 228.9, + "dna_max_len": 425.1, + "aa_min_len": 76.3, + "aa_max_len": 141.7, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "3": { + "seq_id": 3, + "locus_name": "locus_4", + "locus_name_alt": "SALM_8644", + "locus_product": "$", + "locus_description": "AZ624_004720", + "locus_uid": "AZ624_004720", + "dna_seq_len": 417, + "dna_seq_hash": "ac1b21798c0f672ad26f5a91ea278590", + "aa_seq_len": 139, + "aa_seq_hash": "0c25367401155278f34832f184ab44e6", + "dna_min_len": 291.9, + "dna_max_len": 542.1, + "aa_min_len": 97.3, + "aa_max_len": 180.7, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "4": { + "seq_id": 4, + "locus_name": "locus_5", + "locus_name_alt": "SALM_1876", + "locus_product": "%", + "locus_description": "SPI-1 type III secretion system invasion lipoprotein InvH", + "locus_uid": "E", + "dna_seq_len": 444, + "dna_seq_hash": "d00defcca8588f21ce16fa1d0ac13389", + "aa_seq_len": 148, + "aa_seq_hash": "c8bf12a8057fc5e541fcd4924136a40d", + "dna_min_len": 310.8, + "dna_max_len": 577.2, + "aa_min_len": 103.6, + "aa_max_len": 192.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "5": { + "seq_id": 5, + "locus_name": "locus_6", + "locus_name_alt": "SALM_640", + "locus_product": "^", + "locus_description": "MOSC domain-containing protein", + "locus_uid": "F", + "dna_seq_len": 543, + "dna_seq_hash": "a11561f2804e2c32c78049f8b9aeb517", + "aa_seq_len": 181, + "aa_seq_hash": "1f6a45ea291940ef2c17ec1cfdd5fbdd", + "dna_min_len": 380.1, + "dna_max_len": 705.9, + "aa_min_len": 126.7, + "aa_max_len": 235.3, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "6": { + "seq_id": 6, + "locus_name": "locus_7", + "locus_name_alt": "SALM_1501", + "locus_product": "&", + "locus_description": "India: Vellore", + "locus_uid": "G", + "dna_seq_len": 606, + "dna_seq_hash": "dc94bf1ec4ff9bed2a1f460cbd958656", + "aa_seq_len": 202, + "aa_seq_hash": "62252b3326997117f127efb88ff09294", + "dna_min_len": 424.2, + "dna_max_len": 787.8, + "aa_min_len": 141.4, + "aa_max_len": 262.6, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "7": { + "seq_id": 7, + "locus_name": "locus_8", + "locus_name_alt": "SALM_756", + "locus_product": "*", + "locus_description": "DNA polymerase III subunit delta'", + "locus_uid": "H", + "dna_seq_len": 642, + "dna_seq_hash": "7ebe74afecf146ec4db816c8deced64f", + "aa_seq_len": 214, + "aa_seq_hash": "2449629747a7c58f0f2cad411db87178", + "dna_min_len": 449.4, + "dna_max_len": 834.6, + "aa_min_len": 149.8, + "aa_max_len": 278.2, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "8": { + "seq_id": 8, + "locus_name": "locus_9", + "locus_name_alt": "SALM_7353", + "locus_product": "1", + "locus_description": "fimbrial assembly chaperone", + "locus_uid": "I", + "dna_seq_len": 684, + "dna_seq_hash": "41ebb36872854b2b33c8c028e23d8ad1", + "aa_seq_len": 228, + "aa_seq_hash": "a75a93991228940fb46d917f238beb5a", + "dna_min_len": 478.8, + "dna_max_len": 889.2, + "aa_min_len": 159.6, + "aa_max_len": 296.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "9": { + "seq_id": 9, + "locus_name": "locus_10", + "locus_name_alt": "SALM_1891", + "locus_product": "200.96", + "locus_description": "5'/3'-nucleotidase SurE", + "locus_uid": "J", + "dna_seq_len": 762, + "dna_seq_hash": "fe04d17ec353c08b903c85fc0ca4dc02", + "aa_seq_len": 254, + "aa_seq_hash": "bd09702e070040e0fc8d2ec3b830812c", + "dna_min_len": 533.4, + "dna_max_len": 990.6, + "aa_min_len": 177.8, + "aa_max_len": 330.2, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "10": { + "seq_id": 10, + "locus_name": "locus_11", + "locus_name_alt": "SALM_1452", + "locus_product": "|", + "locus_description": "1-phosphofructokinase", + "locus_uid": "K", + "dna_seq_len": 858, + "dna_seq_hash": "5b128d659955716833ce42f2bb060212", + "aa_seq_len": 286, + "aa_seq_hash": "4daecf1a6ccae76e1d97a4ce9ee4ff0b", + "dna_min_len": 600.6, + "dna_max_len": 1115.4, + "aa_min_len": 200.2, + "aa_max_len": 371.8, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "11": { + "seq_id": 11, + "locus_name": "locus_12", + "locus_name_alt": "SALM_11020", + "locus_product": "_", + "locus_description": "1", + "locus_uid": "L", + "dna_seq_len": 972, + "dna_seq_hash": "eb72da68c159497d5f0c8eeddc51b5ae", + "aa_seq_len": 324, + "aa_seq_hash": "a1f16dd269dc295e715f2d00f9c26b43", + "dna_min_len": 680.4, + "dna_max_len": 1263.6, + "aa_min_len": 226.8, + "aa_max_len": 421.2, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "12": { + "seq_id": 12, + "locus_name": "locus_13", + "locus_name_alt": "SALM_1934", + "locus_product": "-", + "locus_description": "3.9", + "locus_uid": "M", + "dna_seq_len": 1098, + "dna_seq_hash": "8f300259dcb46224bdc1fe5273107324", + "aa_seq_len": 366, + "aa_seq_hash": "b16bc26e42f5bc4327504b1ec8b2d53d", + "dna_min_len": 768.6, + "dna_max_len": 1427.4, + "aa_min_len": 256.2, + "aa_max_len": 475.8, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "13": { + "seq_id": 13, + "locus_name": "locus_14", + "locus_name_alt": "SALM_2871", + "locus_product": "+", + "locus_description": "@", + "locus_uid": "N", + "dna_seq_len": 1281, + "dna_seq_hash": "b9060019038526aa6fc38d2f7510edc6", + "aa_seq_len": 427, + "aa_seq_hash": "ed11561b2bedaa12c7a28eb0e9346101", + "dna_min_len": 896.7, + "dna_max_len": 1665.3, + "aa_min_len": 298.9, + "aa_max_len": 555.1, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "14": { + "seq_id": 14, + "locus_name": "locus_15", + "locus_name_alt": "SALM_583", + "locus_product": "=", + "locus_description": "DMT family transporter", + "locus_uid": "O", + "dna_seq_len": 1434, + "dna_seq_hash": "bc98c2fe196a68a79036814396513a8d", + "aa_seq_len": 478, + "aa_seq_hash": "16cb2acec887d5861327e04f2705b8ce", + "dna_min_len": 1003.8, + "dna_max_len": 1864.2, + "aa_min_len": 334.6, + "aa_max_len": 621.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "15": { + "seq_id": 15, + "locus_name": "locus_16", + "locus_name_alt": "SALM_780", + "locus_product": "<", + "locus_description": "murein transglycosylase A", + "locus_uid": "P", + "dna_seq_len": 1464, + "dna_seq_hash": "16e55766c603fe33c9e75d8e81743ae2", + "aa_seq_len": 488, + "aa_seq_hash": "b20314e55f9713235e9c4ea5817b56df", + "dna_min_len": 1024.8, + "dna_max_len": 1903.2, + "aa_min_len": 341.6, + "aa_max_len": 634.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "16": { + "seq_id": 16, + "locus_name": "locus_17", + "locus_name_alt": "SALM_1937", + "locus_product": ">", + "locus_description": "GTPase HflX", + "locus_uid": "Q", + "dna_seq_len": 1836, + "dna_seq_hash": "a0d97d985483413f3c18bfe5833ae9ce", + "aa_seq_len": 612, + "aa_seq_hash": "d6012eddc7a6e8d7d40761d00ed71a5a", + "dna_min_len": 1285.2, + "dna_max_len": 2386.8, + "aa_min_len": 428.4, + "aa_max_len": 795.6, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "17": { + "seq_id": 17, + "locus_name": "locus_18", + "locus_name_alt": "SALM_1997", + "locus_product": "?", + "locus_description": "biosynthetic arginine decarboxylase", + "locus_uid": "R", + "dna_seq_len": 1914, + "dna_seq_hash": "b3021e979faa7600756c06dfadfcf14c", + "aa_seq_len": 638, + "aa_seq_hash": "e68e83956ee1d4c685571e5348c8def1", + "dna_min_len": 1339.8, + "dna_max_len": 2488.2, + "aa_min_len": 446.6, + "aa_max_len": 829.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "18": { + "seq_id": 18, + "locus_name": "locus_19", + "locus_name_alt": "SALM_9926", + "locus_product": ",", + "locus_description": "https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662", + "locus_uid": "S", + "dna_seq_len": 2037, + "dna_seq_hash": "a012eee23637b48e39b00808a057e35d", + "aa_seq_len": 679, + "aa_seq_hash": "1302fea1dbd5bb756db34e09b863ad44", + "dna_min_len": 1425.9, + "dna_max_len": 2648.1, + "aa_min_len": 475.3, + "aa_max_len": 882.7, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "19": { + "seq_id": 19, + "locus_name": "locus_20", + "locus_name_alt": "SALM_6064", + "locus_product": ".", + "locus_description": "alpha-2-macroglobulin family protein", + "locus_uid": "T", + "dna_seq_len": 4935, + "dna_seq_hash": "4461918e985715e4a2b07494e1f91326", + "aa_seq_len": 1645, + "aa_seq_hash": "12ff39a1933fa478729b142976b0a659", + "dna_min_len": 3454.5, + "dna_max_len": 6415.5, + "aa_min_len": 1151.5, + "aa_max_len": 2138.5, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + } + }, + "query_data": { + "sample_name": "G14", + "query_seq_data": { + "0": { + "parent_id": "locus_1:0:0:0", + "locus_name": "locus_1:0:0:0", + "seq_id": "locus_1:0:0:0", + "dna_hash": "d17b02d12afa7f832ee37df6f24a8f55", + "dna_len": 102, + "aa_hash": "a931d1f75114576e60538364eb01a05f", + "aa_len": 34, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "1": { + "parent_id": "locus_10:9:0:1", + "locus_name": "locus_10:9:0:1", + "seq_id": "locus_10:9:0:1", + "dna_hash": "fe04d17ec353c08b903c85fc0ca4dc02", + "dna_len": 762, + "aa_hash": "988bf512f0362e276b0e5622fbaa7079", + "aa_len": 254, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "2": { + "parent_id": "locus_11:10:0:2", + "locus_name": "locus_11:10:0:2", + "seq_id": "locus_11:10:0:2", + "dna_hash": "5b128d659955716833ce42f2bb060212", + "dna_len": 858, + "aa_hash": "d6a46f107d0604f27820147b523948c8", + "aa_len": 286, + "start_codon": "gtg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "3": { + "parent_id": "locus_12:11:0:3", + "locus_name": "locus_12:11:0:3", + "seq_id": "locus_12:11:0:3", + "dna_hash": "eb72da68c159497d5f0c8eeddc51b5ae", + "dna_len": 972, + "aa_hash": "2fb77f6d0f615a030a08af74f3eda277", + "aa_len": 324, + "start_codon": "atg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "4": { + "parent_id": "locus_13:12:0:4", + "locus_name": "locus_13:12:0:4", + "seq_id": "locus_13:12:0:4", + "dna_hash": "8f300259dcb46224bdc1fe5273107324", + "dna_len": 1098, + "aa_hash": "fd5140611af6e0d9f95426bf9beaa8b1", + "aa_len": 366, + "start_codon": "atg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "5": { + "parent_id": "locus_13:12:0:5", + "locus_name": "locus_13:12:0:5", + "seq_id": "locus_13:12:0:5", + "dna_hash": "8f300259dcb46224bdc1fe5273107324", + "dna_len": 1098, + "aa_hash": "fd5140611af6e0d9f95426bf9beaa8b1", + "aa_len": 366, + "start_codon": "atg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "6": { + "parent_id": "locus_14:13:0:6", + "locus_name": "locus_14:13:0:6", + "seq_id": "locus_14:13:0:6", + "dna_hash": "b9060019038526aa6fc38d2f7510edc6", + "dna_len": 1281, + "aa_hash": "05bc7823b1abc2e6d4e2c08ca5325134", + "aa_len": 427, + "start_codon": "ttg", + "stop_codon": "tag", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "7": { + "parent_id": "locus_15:14:0:7", + "locus_name": "locus_15:14:0:7", + "seq_id": "locus_15:14:0:7", + "dna_hash": "bc98c2fe196a68a79036814396513a8d", + "dna_len": 1434, + "aa_hash": "5d8c3cf06444c3741ebfc57cc92db0d3", + "aa_len": 478, + "start_codon": "gtg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "8": { + "parent_id": "locus_16:15:0:8", + "locus_name": "locus_16:15:0:8", + "seq_id": "locus_16:15:0:8", + "dna_hash": "16e55766c603fe33c9e75d8e81743ae2", + "dna_len": 1464, + "aa_hash": "f85b3701f5642454bf4d2263feb13354", + "aa_len": 488, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "9": { + "parent_id": "locus_17:16:0:9", + "locus_name": "locus_17:16:0:9", + "seq_id": "locus_17:16:0:9", + "dna_hash": "a0d97d985483413f3c18bfe5833ae9ce", + "dna_len": 1836, + "aa_hash": "bdd2b967a7a6337b5973dd723a62fa43", + "aa_len": 612, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "10": { + "parent_id": "locus_18:17:0:10", + "locus_name": "locus_18:17:0:10", + "seq_id": "locus_18:17:0:10", + "dna_hash": "b3021e979faa7600756c06dfadfcf14c", + "dna_len": 1914, + "aa_hash": "42c4a831ee79a27c47138fe96829814b", + "aa_len": 638, + "start_codon": "atg", + "stop_codon": "tag", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "11": { + "parent_id": "locus_19:18:0:11", + "locus_name": "locus_19:18:0:11", + "seq_id": "locus_19:18:0:11", + "dna_hash": "a012eee23637b48e39b00808a057e35d", + "dna_len": 2037, + "aa_hash": "cb1202450e68e2b4f0d557a645f1a98d", + "aa_len": 679, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "12": { + "parent_id": "locus_2:1:0:12", + "locus_name": "locus_2:1:0:12", + "seq_id": "locus_2:1:0:12", + "dna_hash": "e35184c8ff18e9116fc8faef20532f56", + "dna_len": 285, + "aa_hash": "2a1a77c25ad681437705d9145aef608c", + "aa_len": 95, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "13": { + "parent_id": "locus_20:19:0:13", + "locus_name": "locus_20:19:0:13", + "seq_id": "locus_20:19:0:13", + "dna_hash": "4461918e985715e4a2b07494e1f91326", + "dna_len": 4935, + "aa_hash": "ffe361d9273afbab0ed9009dd5aa4041", + "aa_len": 1645, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "14": { + "parent_id": "locus_3:2:0:14", + "locus_name": "locus_3:2:0:14", + "seq_id": "locus_3:2:0:14", + "dna_hash": "670705cd2a59c4a23a897ac656a888fe", + "dna_len": 327, + "aa_hash": "f46f4f7b2362abaf1440c9752e10670e", + "aa_len": 109, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "15": { + "parent_id": "locus_4:3:0:15", + "locus_name": "locus_4:3:0:15", + "seq_id": "locus_4:3:0:15", + "dna_hash": "ac1b21798c0f672ad26f5a91ea278590", + "dna_len": 417, + "aa_hash": "dbcec3a0e9ecdc165c4e9162b079f2ee", + "aa_len": 139, + "start_codon": "ctg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "16": { + "parent_id": "locus_5:4:0:16", + "locus_name": "locus_5:4:0:16", + "seq_id": "locus_5:4:0:16", + "dna_hash": "d00defcca8588f21ce16fa1d0ac13389", + "dna_len": 444, + "aa_hash": "82d8baa0a3dad18a0efd8104ee15baae", + "aa_len": 148, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "17": { + "parent_id": "locus_6:5:0:17", + "locus_name": "locus_6:5:0:17", + "seq_id": "locus_6:5:0:17", + "dna_hash": "a11561f2804e2c32c78049f8b9aeb517", + "dna_len": 543, + "aa_hash": "f7a904be6607ca5a6d9388ad9ca3693a", + "aa_len": 181, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "18": { + "parent_id": "locus_7:6:0:18", + "locus_name": "locus_7:6:0:18", + "seq_id": "locus_7:6:0:18", + "dna_hash": "dc94bf1ec4ff9bed2a1f460cbd958656", + "dna_len": 606, + "aa_hash": "da78b534d889d8f35bec304ef54f1b93", + "aa_len": 202, + "start_codon": "gtg", + "stop_codon": "tag", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "19": { + "parent_id": "locus_8:7:0:19", + "locus_name": "locus_8:7:0:19", + "seq_id": "locus_8:7:0:19", + "dna_hash": "7ebe74afecf146ec4db816c8deced64f", + "dna_len": 642, + "aa_hash": "ad5a47cb3a55985083b963b0cf67f382", + "aa_len": 214, + "start_codon": "atg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "20": { + "parent_id": "locus_9:8:0:20", + "locus_name": "locus_9:8:0:20", + "seq_id": "locus_9:8:0:20", + "dna_hash": "41ebb36872854b2b33c8c028e23d8ad1", + "dna_len": 684, + "aa_hash": "02989d1dbb505d15f06e76ddc9174b7e", + "aa_len": 228, + "start_codon": "atg", + "stop_codon": "tag", + "count_internal_stop": 0, + "dna_ambig_count": 0 + } + }, + "query_hit_columns": [], + "query_hits": { + "0": { + "nucleotide": [ + { + "qseqid": 0, + "sseqid": 0, + "qlen": 102, + "slen": 102, + "qstart": 1, + "qend": 102, + "sstart": 1, + "send": 102, + "length": 102, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 1.8e-51, + "bitscore": 189 + } + ], + "protein": [ + { + "qseqid": 0, + "sseqid": 0, + "qlen": 34, + "slen": 34, + "qstart": 1, + "qend": 34, + "sstart": 1, + "send": 34, + "length": 34, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 4.94e-20, + "bitscore": 64.3 + } + ] + }, + "1": { + "nucleotide": [ + { + "qseqid": 1, + "sseqid": 9, + "qlen": 762, + "slen": 762, + "qstart": 1, + "qend": 762, + "sstart": 1, + "send": 762, + "length": 762, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1408 + } + ], + "protein": [ + { + "qseqid": 1, + "sseqid": 9, + "qlen": 254, + "slen": 254, + "qstart": 1, + "qend": 254, + "sstart": 1, + "send": 254, + "length": 254, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 514.0 + } + ] + }, + "2": { + "nucleotide": [ + { + "qseqid": 2, + "sseqid": 10, + "qlen": 858, + "slen": 858, + "qstart": 1, + "qend": 858, + "sstart": 1, + "send": 858, + "length": 858, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1585 + } + ], + "protein": [ + { + "qseqid": 2, + "sseqid": 10, + "qlen": 286, + "slen": 286, + "qstart": 1, + "qend": 286, + "sstart": 1, + "send": 286, + "length": 286, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 579.0 + } + ] + }, + "3": { + "nucleotide": [ + { + "qseqid": 3, + "sseqid": 11, + "qlen": 972, + "slen": 972, + "qstart": 1, + "qend": 972, + "sstart": 1, + "send": 972, + "length": 972, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1796 + } + ], + "protein": [ + { + "qseqid": 3, + "sseqid": 11, + "qlen": 324, + "slen": 324, + "qstart": 1, + "qend": 324, + "sstart": 1, + "send": 324, + "length": 324, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 640.0 + } + ] + }, + "4": { + "nucleotide": [ + { + "qseqid": 4, + "sseqid": 12, + "qlen": 1098, + "slen": 1098, + "qstart": 1, + "qend": 1098, + "sstart": 1, + "send": 1098, + "length": 1098, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 2028 + } + ], + "protein": [ + { + "qseqid": 4, + "sseqid": 12, + "qlen": 366, + "slen": 366, + "qstart": 1, + "qend": 366, + "sstart": 1, + "send": 366, + "length": 366, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 754.0 + } + ] + }, + "5": { + "nucleotide": [ + { + "qseqid": 5, + "sseqid": 12, + "qlen": 1098, + "slen": 1098, + "qstart": 1, + "qend": 1098, + "sstart": 1, + "send": 1098, + "length": 1098, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 2028 + } + ], + "protein": [ + { + "qseqid": 5, + "sseqid": 12, + "qlen": 366, + "slen": 366, + "qstart": 1, + "qend": 366, + "sstart": 1, + "send": 366, + "length": 366, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 754.0 + } + ] + }, + "6": { + "nucleotide": [ + { + "qseqid": 6, + "sseqid": 13, + "qlen": 1281, + "slen": 1281, + "qstart": 1, + "qend": 1281, + "sstart": 1, + "send": 1281, + "length": 1281, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 2366 + } + ], + "protein": [ + { + "qseqid": 6, + "sseqid": 13, + "qlen": 427, + "slen": 427, + "qstart": 1, + "qend": 427, + "sstart": 1, + "send": 427, + "length": 427, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 869.0 + } + ] + }, + "7": { + "nucleotide": [ + { + "qseqid": 7, + "sseqid": 14, + "qlen": 1434, + "slen": 1434, + "qstart": 1, + "qend": 1434, + "sstart": 1, + "send": 1434, + "length": 1434, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 2649 + } + ], + "protein": [ + { + "qseqid": 7, + "sseqid": 14, + "qlen": 478, + "slen": 478, + "qstart": 1, + "qend": 478, + "sstart": 1, + "send": 478, + "length": 478, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 972.0 + } + ] + }, + "8": { + "nucleotide": [ + { + "qseqid": 8, + "sseqid": 15, + "qlen": 1464, + "slen": 1464, + "qstart": 1, + "qend": 1464, + "sstart": 1, + "send": 1464, + "length": 1464, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 2704 + } + ], + "protein": [ + { + "qseqid": 8, + "sseqid": 15, + "qlen": 488, + "slen": 488, + "qstart": 1, + "qend": 488, + "sstart": 1, + "send": 488, + "length": 488, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 1004.0 + } + ] + }, + "9": { + "nucleotide": [ + { + "qseqid": 9, + "sseqid": 16, + "qlen": 1836, + "slen": 1836, + "qstart": 1, + "qend": 1836, + "sstart": 1, + "send": 1836, + "length": 1836, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 3391 + } + ], + "protein": [ + { + "qseqid": 9, + "sseqid": 16, + "qlen": 612, + "slen": 612, + "qstart": 1, + "qend": 612, + "sstart": 1, + "send": 612, + "length": 612, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 1241.0 + } + ] + }, + "10": { + "nucleotide": [ + { + "qseqid": 10, + "sseqid": 17, + "qlen": 1914, + "slen": 1914, + "qstart": 1, + "qend": 1914, + "sstart": 1, + "send": 1914, + "length": 1914, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 3535 + } + ], + "protein": [ + { + "qseqid": 10, + "sseqid": 17, + "qlen": 638, + "slen": 638, + "qstart": 1, + "qend": 638, + "sstart": 1, + "send": 638, + "length": 638, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 1326.0 + } + ] + }, + "11": { + "nucleotide": [ + { + "qseqid": 11, + "sseqid": 18, + "qlen": 2037, + "slen": 2037, + "qstart": 1, + "qend": 2037, + "sstart": 1, + "send": 2037, + "length": 2037, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 3762 + } + ], + "protein": [ + { + "qseqid": 11, + "sseqid": 18, + "qlen": 679, + "slen": 679, + "qstart": 1, + "qend": 679, + "sstart": 1, + "send": 679, + "length": 679, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 1419.0 + } + ] + }, + "12": { + "nucleotide": [ + { + "qseqid": 12, + "sseqid": 1, + "qlen": 285, + "slen": 285, + "qstart": 1, + "qend": 285, + "sstart": 1, + "send": 285, + "length": 285, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 1.03e-152, + "bitscore": 527 + } + ], + "protein": [ + { + "qseqid": 12, + "sseqid": 1, + "qlen": 95, + "slen": 95, + "qstart": 1, + "qend": 95, + "sstart": 1, + "send": 95, + "length": 95, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 1.2299999999999998e-68, + "bitscore": 191.0 + } + ] + }, + "13": { + "nucleotide": [ + { + "qseqid": 13, + "sseqid": 19, + "qlen": 4935, + "slen": 4935, + "qstart": 1, + "qend": 4935, + "sstart": 1, + "send": 4935, + "length": 4935, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 9114 + } + ], + "protein": [ + { + "qseqid": 13, + "sseqid": 19, + "qlen": 1645, + "slen": 1645, + "qstart": 1, + "qend": 1645, + "sstart": 1, + "send": 1645, + "length": 1645, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 3332.0 + } + ] + }, + "14": { + "nucleotide": [ + { + "qseqid": 14, + "sseqid": 2, + "qlen": 327, + "slen": 327, + "qstart": 1, + "qend": 327, + "sstart": 1, + "send": 327, + "length": 327, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 5.34e-176, + "bitscore": 604 + } + ], + "protein": [ + { + "qseqid": 14, + "sseqid": 2, + "qlen": 109, + "slen": 109, + "qstart": 1, + "qend": 109, + "sstart": 1, + "send": 109, + "length": 109, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 3.09e-81, + "bitscore": 224.0 + } + ] + }, + "15": { + "nucleotide": [ + { + "qseqid": 15, + "sseqid": 3, + "qlen": 417, + "slen": 417, + "qstart": 1, + "qend": 417, + "sstart": 1, + "send": 417, + "length": 417, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 771 + } + ], + "protein": [ + { + "qseqid": 15, + "sseqid": 3, + "qlen": 139, + "slen": 139, + "qstart": 1, + "qend": 139, + "sstart": 1, + "send": 139, + "length": 139, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 2.34e-106, + "bitscore": 290.0 + } + ] + }, + "16": { + "nucleotide": [ + { + "qseqid": 16, + "sseqid": 4, + "qlen": 444, + "slen": 444, + "qstart": 1, + "qend": 444, + "sstart": 1, + "send": 444, + "length": 444, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 821 + } + ], + "protein": [ + { + "qseqid": 16, + "sseqid": 4, + "qlen": 148, + "slen": 148, + "qstart": 1, + "qend": 148, + "sstart": 1, + "send": 148, + "length": 148, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 1.0400000000000001e-110, + "bitscore": 302.0 + } + ] + }, + "17": { + "nucleotide": [ + { + "qseqid": 17, + "sseqid": 5, + "qlen": 543, + "slen": 543, + "qstart": 1, + "qend": 543, + "sstart": 1, + "send": 543, + "length": 543, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1003 + } + ], + "protein": [ + { + "qseqid": 17, + "sseqid": 5, + "qlen": 181, + "slen": 181, + "qstart": 1, + "qend": 181, + "sstart": 1, + "send": 181, + "length": 181, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 1.7899999999999999e-140, + "bitscore": 380.0 + } + ] + }, + "18": { + "nucleotide": [ + { + "qseqid": 18, + "sseqid": 6, + "qlen": 606, + "slen": 606, + "qstart": 1, + "qend": 606, + "sstart": 1, + "send": 606, + "length": 606, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1120 + } + ], + "protein": [ + { + "qseqid": 18, + "sseqid": 6, + "qlen": 202, + "slen": 202, + "qstart": 1, + "qend": 202, + "sstart": 1, + "send": 202, + "length": 202, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 3.71e-154, + "bitscore": 416.0 + } + ] + }, + "19": { + "nucleotide": [ + { + "qseqid": 19, + "sseqid": 7, + "qlen": 642, + "slen": 642, + "qstart": 1, + "qend": 642, + "sstart": 1, + "send": 642, + "length": 642, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1186 + } + ], + "protein": [ + { + "qseqid": 19, + "sseqid": 7, + "qlen": 214, + "slen": 214, + "qstart": 1, + "qend": 214, + "sstart": 1, + "send": 214, + "length": 214, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 1.75e-157, + "bitscore": 426.0 + } + ] + }, + "20": { + "nucleotide": [ + { + "qseqid": 20, + "sseqid": 8, + "qlen": 684, + "slen": 684, + "qstart": 1, + "qend": 684, + "sstart": 1, + "send": 684, + "length": 684, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1264 + } + ], + "protein": [ + { + "qseqid": 20, + "sseqid": 8, + "qlen": 228, + "slen": 228, + "qstart": 1, + "qend": 228, + "sstart": 1, + "send": 228, + "length": 228, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 6.32e-172, + "bitscore": 463.0 + } + ] + } + }, + "locus_profile": { + "locus_1": { + "nucleotide": [ + "0" + ], + "protein": [ + "0" + ] + }, + "locus_2": { + "nucleotide": [ + "12" + ], + "protein": [ + "12" + ] + }, + "locus_3": { + "nucleotide": [ + "14" + ], + "protein": [ + "14" + ] + }, + "locus_4": { + "nucleotide": [ + "15" + ], + "protein": [ + "15" + ] + }, + "locus_5": { + "nucleotide": [ + "16" + ], + "protein": [ + "16" + ] + }, + "locus_6": { + "nucleotide": [ + "17" + ], + "protein": [ + "17" + ] + }, + "locus_7": { + "nucleotide": [ + "18" + ], + "protein": [ + "18" + ] + }, + "locus_8": { + "nucleotide": [ + "19" + ], + "protein": [ + "19" + ] + }, + "locus_9": { + "nucleotide": [ + "20" + ], + "protein": [ + "20" + ] + }, + "locus_10": { + "nucleotide": [ + "1" + ], + "protein": [ + "1" + ] + }, + "locus_11": { + "nucleotide": [ + "2" + ], + "protein": [ + "2" + ] + }, + "locus_12": { + "nucleotide": [ + "3" + ], + "protein": [ + "3" + ] + }, + "locus_13": { + "nucleotide": [ + "4", + "5" + ], + "protein": [ + "4", + "5" + ] + }, + "locus_14": { + "nucleotide": [ + "6" + ], + "protein": [ + "6" + ] + }, + "locus_15": { + "nucleotide": [ + "7" + ], + "protein": [ + "7" + ] + }, + "locus_16": { + "nucleotide": [ + "8" + ], + "protein": [ + "8" + ] + }, + "locus_17": { + "nucleotide": [ + "9" + ], + "protein": [ + "9" + ] + }, + "locus_18": { + "nucleotide": [ + "10" + ], + "protein": [ + "10" + ] + }, + "locus_19": { + "nucleotide": [ + "11" + ], + "protein": [ + "11" + ] + }, + "locus_20": { + "nucleotide": [ + "13" + ], + "protein": [ + "13" + ] + } + } + }, + "query_hit_columns": [ + "qseqid", + "sseqid", + "qlen", + "slen", + "qstart", + "qend", + "sstart", + "send", + "length", + "mismatch", + "pident", + "qcovhsp", + "qcovs", + "sstrand", + "evalue", + "bitscore" + ] +} \ No newline at end of file diff --git a/tests/test_data/outputs/search/G2/blast/nucleotide/hsps.txt b/tests/test_data/outputs/search/G2/blast/nucleotide/hsps.txt new file mode 100755 index 0000000..00093e5 --- /dev/null +++ b/tests/test_data/outputs/search/G2/blast/nucleotide/hsps.txt @@ -0,0 +1,20 @@ +0 0 102 102 1 102 1 102 102 0 100.000 100 100 plus 1.80e-51 189 +1 9 762 762 1 762 1 762 762 0 100.000 100 100 plus 0.0 1408 +2 10 858 858 1 858 1 858 858 0 100.000 100 100 plus 0.0 1585 +3 11 972 972 1 972 1 972 972 0 100.000 100 100 plus 0.0 1796 +4 12 1098 1098 1 1098 1 1098 1098 0 100.000 100 100 plus 0.0 2028 +5 13 1281 1281 1 1281 1 1281 1281 0 100.000 100 100 plus 0.0 2366 +6 14 1434 1434 1 1434 1 1434 1434 0 100.000 100 100 plus 0.0 2649 +7 15 1464 1464 1 1464 1 1464 1464 0 100.000 100 100 plus 0.0 2704 +8 16 1836 1836 1 1836 1 1836 1836 0 100.000 100 100 plus 0.0 3391 +9 17 1914 1914 1 1914 1 1914 1914 0 100.000 100 100 plus 0.0 3535 +10 18 2037 2037 1 2037 1 2037 2037 0 100.000 100 100 plus 0.0 3762 +11 1 285 285 1 285 1 285 285 0 100.000 100 100 plus 1.03e-152 527 +12 19 4935 4935 1 4935 1 4935 4935 0 100.000 100 100 plus 0.0 9114 +13 2 327 327 1 327 1 327 327 0 100.000 100 100 plus 5.34e-176 604 +14 3 417 417 1 417 1 417 417 0 100.000 100 100 plus 0.0 771 +15 4 444 444 1 444 1 444 444 0 100.000 100 100 plus 0.0 821 +16 5 543 543 1 543 1 543 543 0 100.000 100 100 plus 0.0 1003 +17 6 606 606 1 606 1 606 606 0 100.000 100 100 plus 0.0 1120 +18 7 642 642 1 642 1 642 642 0 100.000 100 100 plus 0.0 1186 +19 8 684 684 1 684 1 684 684 0 100.000 100 100 plus 0.0 1264 diff --git a/tests/test_data/outputs/search/G2/blast/nucleotide/queries.fasta b/tests/test_data/outputs/search/G2/blast/nucleotide/queries.fasta new file mode 100755 index 0000000..9cad9a4 --- /dev/null +++ b/tests/test_data/outputs/search/G2/blast/nucleotide/queries.fasta @@ -0,0 +1,40 @@ +>0 +atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa +>1 +atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa +>2 +gtgaacctggtgaaaaccccgggtctgcacgcggcgggcaaaggcattaacgttgccaatgtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccaacttcaactttcccggctttgacgtgactcccgcagactgggaacgctttgttaacgacttcctgagctggctgggtcagttcgatatggtctgcgtaagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatcgttgatactagccgtgtagagttagtcgccggtcttatagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcccgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgcgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggaaaataccgtcggcgcaggcggttccatggttggcggatggatttacggcctgctgatgcgcgagtccaccgaacatacgctgcgcctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga +>3 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>4 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>5 +ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctattacgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccagcgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcggaaaggcgggattggttcgcgcggtccgggtgaaacccagctcgaagccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgagaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgcagctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaattccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtccgcaggtacgaatgccgattgtcgactggcgtcgcttctgtaaacaagaaccagcgttgatcgaatacgtgatctag +>6 +gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga +>7 +atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctcgtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatctgcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacggtgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcactcaaccggaagggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaaggctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgatgcgctgtcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtaagctgatcagccttgtgcgcaaccttaatcaactgctcaaaagcgagcacgaacgttataacaaataccgcacgagcctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatctgcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcatagcccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttctggccgacagcatcccacacagaaagaggaataa +>8 +atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa +>9 +atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag +>10 +atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacatcgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccacagtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggtcgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattacttgaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcatacagggcggtacggaagtttgtcagcaaaaactggataacatgcgcaacgcaggcgtaaaagtgtacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggggtcagcgcagagctcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacagaagaccggcaagctcggcgagatcctgttctttatgcgggcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccagcaatgactggcagttcgacggcgacgcggaaactattgcacactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgagatttccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa +>11 +atgaaactttttctgacaacggcagcgctaactgcgaccttgacttccggcatgggatttgccagcgataccgtgatcccctgggcgactaacagtggcggtacggaaagcacgcatatcgcggcaatgggcgaggatgtgaatgcgcagcatcagcacatcatccacacgcatgaaggcgtatgtgccgcaaattccggcaccattcaggctgatgaagccgcgctgaccagcaacaagccaccggtacaagtccaaccggaattgttgccgcatcagggctaa +>12 +atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa +>13 +atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa +>14 +ctgaaactaatacctttttatttgctcgcgctattttctgcctcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaacccctgcagataacccgccttataccgtgacaatattgccgccagtagaagcctgcggggaaaaatgtgtgaaactgaacgtagagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaacgacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctacaaccgacagctgcaattacttagggctggcgtaataccgaaaaataaaaagtggaaatggataacgaagatggaaaattactggttcaggcgggtaaaagcgcactaa +>15 +atgaaaaaatttgatagctgtcatcctgtctttttactgatcggctgtgcccaggtgcctctccgttcctccgtgagcaaaccggtacagcaacctagcgctcagaaagagcaactggccaacgcaaatggtattgatgagtgtcagtcacttccgtatgtgccttcagaccttgcgaagaataaatcattatcaaaccagatcgctgataataccgcatcaaaaaatagcgcaatcagctcacgcattttttgcgaaaaatataagcaaaccaaagaacaggcgcttaccttcttccaggaacttccacaatacatgcgttcgaaagaagttgaagagcaacacatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa +>16 +atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa +>17 +gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggactcacaacatgcctgtagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgccagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcgccatccagtcggcaacctggttttcggcaggcaggtcgcgtagggcggttaaaaaaatgatggattgcggcagcggtatttattcttcaatcaatacgttactgaaaaattcgcagattaaaaatatagtcattttaacccataatcattgtttgacgtatattgtaaaaaataagcgcggcgtgaagtttgatccggagtatcttaatgcgctcgttatgtatgcggaaaacggaaaactccttttagacggtgaatttgttcccggctag +>18 +atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga +>19 +atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/search/G2/blast/protein/hsps.txt b/tests/test_data/outputs/search/G2/blast/protein/hsps.txt new file mode 100755 index 0000000..73033ac --- /dev/null +++ b/tests/test_data/outputs/search/G2/blast/protein/hsps.txt @@ -0,0 +1,20 @@ +0 0 34 34 1 34 1 34 34 0 100.000 100 100 N/A 4.94e-20 64.3 +1 9 254 254 1 254 1 254 254 0 100.000 100 100 N/A 0.0 514 +2 10 286 286 1 286 1 286 286 0 100.000 100 100 N/A 0.0 579 +3 11 324 324 1 324 1 324 324 0 100.000 100 100 N/A 0.0 640 +4 12 366 366 1 366 1 366 366 0 100.000 100 100 N/A 0.0 754 +5 13 427 427 1 427 1 427 427 0 100.000 100 100 N/A 0.0 869 +6 14 478 478 1 478 1 478 478 0 100.000 100 100 N/A 0.0 972 +7 15 488 488 1 488 1 488 488 0 100.000 100 100 N/A 0.0 1004 +8 16 612 612 1 612 1 612 612 0 100.000 100 100 N/A 0.0 1241 +9 17 638 638 1 638 1 638 638 0 100.000 100 100 N/A 0.0 1326 +10 18 679 679 1 679 1 679 679 0 100.000 100 100 N/A 0.0 1419 +11 1 95 95 1 95 1 95 95 0 100.000 100 100 N/A 1.23e-68 191 +12 19 1645 1645 1 1645 1 1645 1645 0 100.000 100 100 N/A 0.0 3332 +13 2 109 109 1 109 1 109 109 0 100.000 100 100 N/A 3.09e-81 224 +14 3 139 139 1 139 1 139 139 0 100.000 100 100 N/A 2.34e-106 290 +15 4 148 148 1 148 1 148 148 0 100.000 100 100 N/A 1.04e-110 302 +16 5 181 181 1 181 1 181 181 0 100.000 100 100 N/A 1.79e-140 380 +17 6 202 202 1 202 1 202 202 0 100.000 100 100 N/A 3.71e-154 416 +18 7 214 214 1 214 1 214 214 0 100.000 100 100 N/A 1.75e-157 426 +19 8 228 228 1 228 1 228 228 0 100.000 100 100 N/A 6.32e-172 463 diff --git a/tests/test_data/outputs/search/G2/blast/protein/queries.fasta b/tests/test_data/outputs/search/G2/blast/protein/queries.fasta new file mode 100755 index 0000000..34499a1 --- /dev/null +++ b/tests/test_data/outputs/search/G2/blast/protein/queries.fasta @@ -0,0 +1,40 @@ +>0 +MYDPPFLEALMITASFFAIFIIIVVSVLLLEGD* +>1 +MRILLSNDDGVHAPGIQTLAKALREFADVQVVAPDRNRSGASNSLTLESSLRTFTFDNGDIAVQMGTPTDCVYLGVNALMRPRPDIVVSGINAGPNLGDDVIYSGTVAAAMEGRHLGFPALAVSLNGYQHYDTAAAVTCALLRGLSREPLRTGRILNVNVPDLPLAQVKGIRVTRCGSRHPADKVIPQEDPRGNTLYWIGPPGDKYDAGPDTDFAAVDEGYVSVTPLHVDLTAHSAHDVVSDWLDSVGVGTQW* +>2 +VNLVKTPGLHAAGKGINVANVLKDLGIDVTVGGFLGKDNQDGFQQLFSELGIANRFQVVQGRTRINVKLTEKDGEVTNFNFPGFDVTPADWERFVNDFLSWLGQFDMVCVSGSLPAGVSPEAFTDWMTRLRSQCPCIIVDTSRVELVAGLIAAPWLVKPNRRELEIWAGRKLPEMKDVIDAAPALREQGIAHVVISLGAEGALRVNASGEWIAKPPAVDVENTVGAGGSMVGGWIYGLLMRESTEHTLRLATAVAALAVSQSNVGITDRPQLAAMMARVDLQPFN* +>3 +MNKNKYSTPLLMLATILAGMLSPMQSAVNGQLGHWLQDGNACAVISFASGLVVMFFIIIARKETRQQFASIPTLIKKRKIPLWNWFAGLCGAMVVFSEGASASALGVATFQTALISALLLSGLLCDRFGIGVEEKKYFTPWRITGALFAVIATIFVVSPQWHSTSFILLAILPFLAGLLAGWQPAGNAKVAEATGSMLVSITWNFIVGFCVLGAALAIRIALGHVTIQLPDTWWMYLGGPLGLLSIGLMAILVRGLGLLMLGVASTAGQLLGSVLIDELIPSLGNTVYLVTIIGTLFALVGAIVTTIPEYRASKMAQKMEVSE* +>4 +MKGRWAKYVATGVMLAMLAACSSKPTDRGQQYKDGKFTQPFSLVNQPDAVGAPINAGDFAEQVDQIRSASPRLYTNQSNVYNAVQNWLRSGGDTRTMRQFGIDAWQMEGTDNYGNVQFTGYYTPVVQARHTRQGAFQYPIYSMPPKRGRLPSRAQIYAGALSDKYILAWSNSLMDNFIMDVQGSGYIDFGDGSPLNFFSYAGKNGWPYRSIGKVLIDRGEVKKEDMSMQAIREWGEKHSEAEVRELLEQNPSFVFFKPQSFAPVKGASAVPLIGRASVASDRSIIPPGTTLLAEVPLLDNNGKFSGQYELRLMVALDVGGAIKGQHFDIYQGIGPDAGHRAGWYNHYGRVWVLKSAPGAGNVFSG* +>5 +LFDRYDAGEQAVLVHIYFSQDKDMEDLQEFESLVYYAGVEAMQVITGSRKAPHPKYFVGEGKAVEIAEAVKATGAAVVLFDHALSPAQERNLERLCECRVIDRTGLILDIFAQRARTHEGKLQVELAQLRHLATRLVRGWTHLERRKGGIGSRGPGETQLEADRRLLRNRIVQIQSRLEKVEKQREQGRQSRIKADVPTVSLVGYTNAGKSTLFNQITEARVYAADQLFATLDPTLRRIDVADVGETVLADTVGFIRHLPYDLVAAFKATLQETRQATQLLHVVDAADVRVQENIEAVNTVLEEIDAHEFPTLMVMNKIDMLDDFEPRIDRDEENKPIRVWLSAQSGVGIPQLFQALTERLSGEVAQHTLRLLPQEGRLRSRFYQLQAIEKEWMEEDGSVSPQVRMPIVDWRRFCKQEPALIEYVI* +>6 +VAQRILVLGASGYIGQHLVFALSQQGHQVRAAARRVERLEKHRLANVSCHKVDLHWPENLPALLRDIDTVYYLVHGMGEGGDFIAHERQAALNVRDALRQTPVKQLIFLSSLQAPAHEQSDHLRARQLTADTLRDAGVPVTELRAGIIVGAGSAAFEVMRDMVYNLPILTPPRWVRSRTTPIALENLLYYLVGLLEHPAHEHRILEAAGPQVLSYQQQFERFMAVSGKRRPLIPVPFPTRWISVWFLNVITSVPPTTAKALIQGLRHDLLADDAALKKLIPQTLITFDDAVRRTLKEEEKLVNSSDWGYDALAFARWRPEYGYFPKQAGFTAQTPASLSALWQVVNRLGGKEGYFFGNILWQTRAAMDRLVGHKLAKGRPSHTLLKPGDTVDSWKVIIVEPEKQLTLLFGMKAPGLGRLSFTLHDKGRYREIDVRAWWHPHGMPGLIYWLLMIPAHLFIFRGMARRIARLAEQITEK* +>7 +MNKFARHFLPLSLRVRFLLATAGVVLVLSLAYGIVALVGYSVSFDKTTFRLLRGESNLFYTLAKWENNKICVELPENLDMQSPTMTVIYDETGKLLWTQRNIPWLIKSTQPEGLKTNGFHEIETNVDATSTLLSEDHSAQEKLKEVREDDDDAEMTHSVAVNIYPATTRMPQLTIVVVDTIPIELKGSYMVWSWFVYVLAANLLLVIPLLWIAAWWSLRPIDALSREVRELEDHHREMLNPETTRKLISLVRNLNQLLKSEHERYNKYRTSLTDLTHSLKTPLAVLQSTLRSLRNEKMSVSKAEPVMLEQISRISQQIGYYLHRASMRGSGVLLSRELHPVAPLLDNLISALNKVYLRKGVNISMDISPEISFVGEQNDFVEVMGNVLDNACKYCLEFVEISARQTDDHLHIFVEDDGPGIAHSKRSLVFDRGQRADTLRPGQGVGLAVAREITEQYAGQIIASDSLLGGARMEVVSGRQHPTQKEE* +>8 +MTIQKRLLEAVEQKLLRPIDAQFALTVAGNDDPAVTLAAALLSHDAGEGHVCLPLSRLTLTEEAHPLLVAWISETATPIDWKKRLLASAAVSCGDSPAPLILCGDRLYLNRMWCNERTVARFFNEVNQAIAVDEDQLSRILDALFPPTDEVNWQKVAAAVALTRRISVISGGPGTGKTTTVAKLLAALIQMADGERCRIRLAAPTGKAAARLTESLGAALRQLPLTDAQKKRIPEDASTLHRLLGAQPGSQRLRHHAGNPLHLDVLVVDEASMIDLPMMSRLIDALPPHGRVIFLGDRDQLASVEAGAVLGDICAYVNAGFTAERARQLSRLTGSAIPAGAGTQAASLRDSLCLLQKSYRFGSDSGIGKLAAAINCGDRSAIQAVFQQGFSDIEKRTLQSSDDYAGMLDEALAGYGRYLRLLHEKAAPEAILQAFNEYQLLCALREGPFGVRGLNDRIEQAMVQQRKIQRHPHSRWYEGRPVMIARNDSALGLFNGDIGIALDRGQGLRVWFVMPDGTIKSVQPSRLPEHDTTWAMTVHKSQGSEFDHAALILPSQRSPVVTRELVYTAVTRARRRLSLYADERILAGAIVTRTERRSGLATLFDEVSRIG* +>9 +MQEVAMSSQEASKMLRTYNIAWWGNNYYDVNELGHISVCPDPDVPEARVDLAKLVKAREAQGQRLPALFCFPQILQHRLRSINAAFKRARESYGYNGDYFLVYPIKVNQHRRVIESLIHSGEPLGLEAGSKAELMAVLAHAGMTRSVIVCNGYKDREYIRLALIGEKMGHKVYLVIEKMSEIAIVLEEAERLNVVPRLGVRARLASQGSGKWQSSGGEKSKFGLAATQVLQLVETLRDAGRLDSLQLLHFHLGSQMANIRDIATGVRESARFYVELHKLGVNIQCFDVGGGLGVDYEGTRSQSDCSVNYGLNEYANNIIWAIGDACEEHGLPHPTVITESGRAVTAHHTVLVSNIIGVERNEYTDPTAPAEDAPRALQNLWETWQEMHKPGTRRSLREWLHDSQMDLHDIHIGYSSGAFSLQERAWAEQLYLSMCHEVQKQLDPQNRAHRPIIDELQERMADKMYVNFSLFQSMPDAWGIDQLFPVLPLEGLDQVPERRAVLLDITCDSDGAIDHYIDGDGIATTMPMPEYDPENPPMLGFFMVGAYQEILGNMHNLFGDTEAVDVFVFPDGSVEVELSDEGDTVADMLQYVQLDPKTLLTHFRDQVKQTDLDDALQQQFLEEFEAGLYGYTYLEDE* +>10 +MNSLPQRSTDFELTTSQDGFALSWQQRLILRHSAENPCLWIGAGVADIDMFRGNFSIKDKLNEKIALTEATVSELPDGWLVQFSRGATISATLRISADEAGRLTLDLQNDDLHHNRIWLRLAANPDDHIYGCGEQFSYFDLRGKPFPLWTSEQGVGRNKTSYVTWQADCKENAGGDYYLTFFPQPTFVSTQKYYCHVDNSCYMNFDFSAPEYHELALWEDKTTLRFECADTYIALLEKLTALLGRQPELPDWVYDGVTLGIQGGTEVCQQKLDNMRNAGVKVYGIWAQDWSGIRMTSFGKRVMWNWKWNSDNYPQLDSRIKQWKEEGVQFLSYINPYVASDKDLCAEAARHGYLAKDATGGDYLVEFGEFYGGVVDLTNPEAYDWFKDVIKKNMIALGCSGWMADFGEYLPTDTYLHNGVSAELMHNAWPALWAKCNYEALQKTGKLGEILFFMRAGYTGSQKYSTMMWAGDQNVDWSLDDGLASVVPAALSLAMTGHGLHHSDIGGYTTLFDMKRSKELLLRWCDFSAFTPMMRTHEGNRPSNDWQFDGDAETIAHFARMTTVFTTLKPYLKQAVAQNAATGLPVMRPLFLHYENDAATYTLKYQYLLGQDLLVAPVHEQGRCDWTLYLPEDHWVNIWTGEAHHGGEISVDAPIGKPPVFYRAKSEWALLFASLRNI* +>11 +MKLFLTTAALTATLTSGMGFASDTVIPWATNSGGTESTHIAAMGEDVNAQHQHIIHTHEGVCAANSGTIQADEAALTSNKPPVQVQPELLPHQG* +>12 +MKHLRVVACMIMLALAGCDNNDKTAPTTKSEAPAVAQPSPAQDPSQLQKLAQQSQGKALTLLDASEAQLDGAATLVLTFSIPLDPEQDFSRVVHVVDKKSGSVDGAWELAPNLKELRLRHLEPERVLVVTVDPAVKALNNATFGKSYEKTITTRDVQPSVGFASRGSLLPGKIAEGLPVMALNVNHVDVNFFRVKPGSLASFVSQWEYRSSLSNWESDNLLKMADLVYTGRFDLNPARNTREKLLLPLSDIKPLQQAGVYVAVMNQAGHYNYSNAATLFTLSDIGVSAHRYHSRLDIFTQSLENGAAQSGIEIVLLNDKGQTLAQATSDAQGHVQLEADKAAALLLARKEGQTTLLDLTLPALDLSEFNVAGAPGYSKQFFMFGPRDLYRPGETVILNGLLRDSDGKTLPDQPVKLEVVKPDGQVMRTVVSQPENGLYRLNYPLDINAPTGLWHVRANTGDNLLRSWDFHVEDFMPERMALNLTAQKTPLAPADEVKFSVVGYYLYGAPANGNTLQGQLFLRPLRDAVAALPGFQFGNIAEENLSRSLDEVQLTLDKGGRGEVSAASQWQEAHSPLQVILQASLLESGGRPVTRRVEQAIWPADTLPGIRPQFAAKAVYDYRTDTTVNQPIVDEDSNAAFDIVYANAQGEKKAVSGLQVRLIRERRDYYWNWSESEGWQSQFDQKDLVEGEQTLDLNADETGKVSFPVEWGAYRLEVKAPNETVSSVRFWAGYSWQDNSDGSGAARPDRVTLKLDKANYRPGDTMKLHIAAPVAGKGYAMVESSDGPLWWQAIDVPAQGLELTIPVDKTWNRHDLYLSTLVVRPGDKSRSATPKRAVGLLHLPLGDDNRRLDLALESPAKMRPNQPLTVRVKASVKHGEMPKQINVLVSAVDSGVLNITDYATPDPWQAFFGQKRYGADIYDIYGQVIEGQGRLAALRFGGDGDDLTRGGKPPVNHANIIAQQAQPITLNEQGEGVVTLPIGDFNGELRVMAQAWTADDFGRGESKVVVAAPVIAELNMPRFLAGGDVSRLVLDVTNLTDRPQTLNIALAASGLLELLSQQPQPVNLAPGVRTTLFVPVRALEGFGEGEIQATISGLNLPGETLGAQHKQWQIGVRPAWPAQTVNSGIALAPGESWHVPEQHLANVSPATLQGQLLLSGKPPLNLARYIRELKAYPYGCLEQTTSGLFPALYTNAAQLQSLGITGDSDEKRRAAVDIGISRILQMQRDNGGFALWDENGAEEPWLTAYAMDFLIRAGEQGYSVPPEAINRGNERLLRYLQDPGTMLIRYSDNTQASTFAAQAYAALVLARQQKAPLGALREIWERRSQAASGLPLMQLGIALNTMGDARRGEEAITLALNTPRQDERQWIADYGSSLRDNALMLSLLEENNLRPDAQNALLSSLSEQAFGQRWLSTQENNALFLAAHSRQASAGAWQAQTSLEAQPLSGDKALTRNLDADQLAALEVTNTGSQPLWLRLDSSGYPSSAPEPASNVLQIERQILGTDGQRKSLSSLRSGELVLVWLTVVADRNVPDALVVDLLPAGLELENQNLADSSASLPESGSEVQNLLNQMQQADIQYMEFRDDRFVAAVVVNEGQPVTLVYLARAVTPGTYQLAQPQVESMYAPQWRATGASEGLLIVTP* +>13 +MAKNRSRRLRKKMHIDEFQELGFSVAWRFPEGTSEEQIDKTVDDFINDVIEPNKLAFDGSGYLAWEGLICMQEIGKCTEEHQAIVRKWLEARNLEEVRTSELFDVWWD* +>14 +LKLIPFYLLALFSASSGATEINACKDLIGTWKTPADNPPYTVTILPPVEACGEKCVKLNVEYELDVTHRNALYCHERQEGVKGQGPMVIAFEGAYGGHAIGTYNRQLQLLRAGVIPKNKKWKWITKMENYWFRRVKAH* +>15 +MKKFDSCHPVFLLIGCAQVPLRSSVSKPVQQPSAQKEQLANANGIDECQSLPYVPSDLAKNKSLSNQIADNTASKNSAISSRIFCEKYKQTKEQALTFFQELPQYMRSKEVEEQHMTEFKKVLLEPGSKNLSIYQTLLAAHERLQAL* +>16 +MRIKPDDNWRWYYDEEHDRMMLDLANGMLFRSRFSRKMLTPDAFCPTGFCVDDAALYFSFEEKCRDFELTKEQRAELVLNALVAIRYLKPQMPKSWHFVAHGEMWTPGTGDAASVWLSDTAEQVNLLVVEPGENAALCLLAQPGVVIAGRTMQLGDAIKIMNDRLKPQVHCHSFSLEQAV* +>17 +VLAFTLRFIKNKRYFAILAGALVIIAGLDSQHACSGNGLPQINGKALAALAKQHPVVVLFRHAERCDRSDNTCLPDSTGITVNGAQDARALGKAFSADIQNYNLYSSNTVRAIQSATWFSAGRSRRAVKKMMDCGSGIYSSINTLLKNSQIKNIVILTHNHCLTYIVKNKRGVKFDPEYLNALVMYAENGKLLLDGEFVPG* +>18 +MGSNYIVIEGLEGAGKTTARDVVVETLEQLGIRNMIFTREPGGTQLAEKLRSLVLDIRSVGDEVITDKAEVLMFYAARVQLVETVIKPALAQGVWVIGDRHDLSTQAYQGGGRGIDQTMLATLRDAVLGDFRPDLTLYLDVTPEVGLKRARARGDLDRIEQESFDFFNRTRARYLELAAQDSRIRTIDATQPLDAVMRDIRATVTKWVQEQAA* +>19 +MKHIKKSVLVVLLTSHVAHASIVVGGTRLVFDGNNDESSINVENKDSKANLVQSWLSVADPQVTNKQVFIITPPLFRLDAGQKNSIRVIRSGAPLPADRESMYWLNIKGIPSIDDNASANRVEISINTQIKLIYRPPALTKSTPDSQSQQLKWQTAGDVITVNNPTPYYMNFASVTLNSHEVKSATFVPPKSSASFKLSSTAAPHGTVTWRLISDYGMSLEPHSGSF* diff --git a/tests/test_data/outputs/search/G2/run.json b/tests/test_data/outputs/search/G2/run.json new file mode 100755 index 0000000..849db03 --- /dev/null +++ b/tests/test_data/outputs/search/G2/run.json @@ -0,0 +1,27 @@ +{ + "analysis_start_time": "10/06/2024 11:11:38", + "parameters": { + "query": "locidex/extract/G2/raw.extracted.seqs.fasta", + "outdir": "locidex/search/G2", + "name": "G2", + "db": "locidex/db", + "config": null, + "min_evalue": 0.0001, + "min_dna_len": 1, + "min_aa_len": 1, + "max_dna_len": 10000000, + "max_aa_len": 10000000, + "min_dna_ident": 80.0, + "min_aa_ident": 80.0, + "min_dna_match_cov": 80.0, + "min_aa_match_cov": 80.0, + "max_target_seqs": 10, + "n_threads": 8, + "format": null, + "translation_table": 11, + "annotate": false, + "force": true + }, + "result_file": "locidex/search/G2/seq_store.json", + "analysis_end_time": "10/06/2024 11:11:41" +} \ No newline at end of file diff --git a/tests/test_data/outputs/search/G2/seq_store.json b/tests/test_data/outputs/search/G2/seq_store.json new file mode 100755 index 0000000..d2f120d --- /dev/null +++ b/tests/test_data/outputs/search/G2/seq_store.json @@ -0,0 +1,1744 @@ +{ + "db_info": { + "db_name": "Locidex Test Database", + "db_version": "1.0.0", + "db_date": "2024/06/06", + "db_author": "James Robertson", + "db_desc": "Using Salmonella GCA_000007545.2 as source data", + "db_num_seqs": 20, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" + }, + "db_seq_info": { + "0": { + "seq_id": 0, + "locus_name": "locus_1", + "locus_name_alt": "SALM_11273", + "locus_product": "!", + "locus_description": "hypothetical protein", + "locus_uid": "1", + "dna_seq_len": 102, + "dna_seq_hash": "d17b02d12afa7f832ee37df6f24a8f55", + "aa_seq_len": 34, + "aa_seq_hash": "84ac553cb45dd790c497c27152888f02", + "dna_min_len": 71.4, + "dna_max_len": 132.6, + "aa_min_len": 23.8, + "aa_max_len": 44.2, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "1": { + "seq_id": 1, + "locus_name": "locus_2", + "locus_name_alt": "SALM_120", + "locus_product": "@", + "locus_description": "outer membrane protein", + "locus_uid": "B", + "dna_seq_len": 285, + "dna_seq_hash": "e35184c8ff18e9116fc8faef20532f56", + "aa_seq_len": 95, + "aa_seq_hash": "2d9e7f7f0d11bf02db860b0799e7924d", + "dna_min_len": 199.5, + "dna_max_len": 370.5, + "aa_min_len": 66.5, + "aa_max_len": 123.5, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "2": { + "seq_id": 2, + "locus_name": "locus_3", + "locus_name_alt": "SALM_2016", + "locus_product": "#", + "locus_description": "tRNA (guanosine(46)-N7)-methyltransferase TrmB", + "locus_uid": "C", + "dna_seq_len": 327, + "dna_seq_hash": "670705cd2a59c4a23a897ac656a888fe", + "aa_seq_len": 109, + "aa_seq_hash": "4277f8bbf1fd6682001da089fea83d04", + "dna_min_len": 228.9, + "dna_max_len": 425.1, + "aa_min_len": 76.3, + "aa_max_len": 141.7, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "3": { + "seq_id": 3, + "locus_name": "locus_4", + "locus_name_alt": "SALM_8644", + "locus_product": "$", + "locus_description": "AZ624_004720", + "locus_uid": "AZ624_004720", + "dna_seq_len": 417, + "dna_seq_hash": "ac1b21798c0f672ad26f5a91ea278590", + "aa_seq_len": 139, + "aa_seq_hash": "0c25367401155278f34832f184ab44e6", + "dna_min_len": 291.9, + "dna_max_len": 542.1, + "aa_min_len": 97.3, + "aa_max_len": 180.7, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "4": { + "seq_id": 4, + "locus_name": "locus_5", + "locus_name_alt": "SALM_1876", + "locus_product": "%", + "locus_description": "SPI-1 type III secretion system invasion lipoprotein InvH", + "locus_uid": "E", + "dna_seq_len": 444, + "dna_seq_hash": "d00defcca8588f21ce16fa1d0ac13389", + "aa_seq_len": 148, + "aa_seq_hash": "c8bf12a8057fc5e541fcd4924136a40d", + "dna_min_len": 310.8, + "dna_max_len": 577.2, + "aa_min_len": 103.6, + "aa_max_len": 192.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "5": { + "seq_id": 5, + "locus_name": "locus_6", + "locus_name_alt": "SALM_640", + "locus_product": "^", + "locus_description": "MOSC domain-containing protein", + "locus_uid": "F", + "dna_seq_len": 543, + "dna_seq_hash": "a11561f2804e2c32c78049f8b9aeb517", + "aa_seq_len": 181, + "aa_seq_hash": "1f6a45ea291940ef2c17ec1cfdd5fbdd", + "dna_min_len": 380.1, + "dna_max_len": 705.9, + "aa_min_len": 126.7, + "aa_max_len": 235.3, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "6": { + "seq_id": 6, + "locus_name": "locus_7", + "locus_name_alt": "SALM_1501", + "locus_product": "&", + "locus_description": "India: Vellore", + "locus_uid": "G", + "dna_seq_len": 606, + "dna_seq_hash": "dc94bf1ec4ff9bed2a1f460cbd958656", + "aa_seq_len": 202, + "aa_seq_hash": "62252b3326997117f127efb88ff09294", + "dna_min_len": 424.2, + "dna_max_len": 787.8, + "aa_min_len": 141.4, + "aa_max_len": 262.6, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "7": { + "seq_id": 7, + "locus_name": "locus_8", + "locus_name_alt": "SALM_756", + "locus_product": "*", + "locus_description": "DNA polymerase III subunit delta'", + "locus_uid": "H", + "dna_seq_len": 642, + "dna_seq_hash": "7ebe74afecf146ec4db816c8deced64f", + "aa_seq_len": 214, + "aa_seq_hash": "2449629747a7c58f0f2cad411db87178", + "dna_min_len": 449.4, + "dna_max_len": 834.6, + "aa_min_len": 149.8, + "aa_max_len": 278.2, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "8": { + "seq_id": 8, + "locus_name": "locus_9", + "locus_name_alt": "SALM_7353", + "locus_product": "1", + "locus_description": "fimbrial assembly chaperone", + "locus_uid": "I", + "dna_seq_len": 684, + "dna_seq_hash": "41ebb36872854b2b33c8c028e23d8ad1", + "aa_seq_len": 228, + "aa_seq_hash": "a75a93991228940fb46d917f238beb5a", + "dna_min_len": 478.8, + "dna_max_len": 889.2, + "aa_min_len": 159.6, + "aa_max_len": 296.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "9": { + "seq_id": 9, + "locus_name": "locus_10", + "locus_name_alt": "SALM_1891", + "locus_product": "200.96", + "locus_description": "5'/3'-nucleotidase SurE", + "locus_uid": "J", + "dna_seq_len": 762, + "dna_seq_hash": "fe04d17ec353c08b903c85fc0ca4dc02", + "aa_seq_len": 254, + "aa_seq_hash": "bd09702e070040e0fc8d2ec3b830812c", + "dna_min_len": 533.4, + "dna_max_len": 990.6, + "aa_min_len": 177.8, + "aa_max_len": 330.2, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "10": { + "seq_id": 10, + "locus_name": "locus_11", + "locus_name_alt": "SALM_1452", + "locus_product": "|", + "locus_description": "1-phosphofructokinase", + "locus_uid": "K", + "dna_seq_len": 858, + "dna_seq_hash": "5b128d659955716833ce42f2bb060212", + "aa_seq_len": 286, + "aa_seq_hash": "4daecf1a6ccae76e1d97a4ce9ee4ff0b", + "dna_min_len": 600.6, + "dna_max_len": 1115.4, + "aa_min_len": 200.2, + "aa_max_len": 371.8, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "11": { + "seq_id": 11, + "locus_name": "locus_12", + "locus_name_alt": "SALM_11020", + "locus_product": "_", + "locus_description": "1", + "locus_uid": "L", + "dna_seq_len": 972, + "dna_seq_hash": "eb72da68c159497d5f0c8eeddc51b5ae", + "aa_seq_len": 324, + "aa_seq_hash": "a1f16dd269dc295e715f2d00f9c26b43", + "dna_min_len": 680.4, + "dna_max_len": 1263.6, + "aa_min_len": 226.8, + "aa_max_len": 421.2, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "12": { + "seq_id": 12, + "locus_name": "locus_13", + "locus_name_alt": "SALM_1934", + "locus_product": "-", + "locus_description": "3.9", + "locus_uid": "M", + "dna_seq_len": 1098, + "dna_seq_hash": "8f300259dcb46224bdc1fe5273107324", + "aa_seq_len": 366, + "aa_seq_hash": "b16bc26e42f5bc4327504b1ec8b2d53d", + "dna_min_len": 768.6, + "dna_max_len": 1427.4, + "aa_min_len": 256.2, + "aa_max_len": 475.8, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "13": { + "seq_id": 13, + "locus_name": "locus_14", + "locus_name_alt": "SALM_2871", + "locus_product": "+", + "locus_description": "@", + "locus_uid": "N", + "dna_seq_len": 1281, + "dna_seq_hash": "b9060019038526aa6fc38d2f7510edc6", + "aa_seq_len": 427, + "aa_seq_hash": "ed11561b2bedaa12c7a28eb0e9346101", + "dna_min_len": 896.7, + "dna_max_len": 1665.3, + "aa_min_len": 298.9, + "aa_max_len": 555.1, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "14": { + "seq_id": 14, + "locus_name": "locus_15", + "locus_name_alt": "SALM_583", + "locus_product": "=", + "locus_description": "DMT family transporter", + "locus_uid": "O", + "dna_seq_len": 1434, + "dna_seq_hash": "bc98c2fe196a68a79036814396513a8d", + "aa_seq_len": 478, + "aa_seq_hash": "16cb2acec887d5861327e04f2705b8ce", + "dna_min_len": 1003.8, + "dna_max_len": 1864.2, + "aa_min_len": 334.6, + "aa_max_len": 621.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "15": { + "seq_id": 15, + "locus_name": "locus_16", + "locus_name_alt": "SALM_780", + "locus_product": "<", + "locus_description": "murein transglycosylase A", + "locus_uid": "P", + "dna_seq_len": 1464, + "dna_seq_hash": "16e55766c603fe33c9e75d8e81743ae2", + "aa_seq_len": 488, + "aa_seq_hash": "b20314e55f9713235e9c4ea5817b56df", + "dna_min_len": 1024.8, + "dna_max_len": 1903.2, + "aa_min_len": 341.6, + "aa_max_len": 634.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "16": { + "seq_id": 16, + "locus_name": "locus_17", + "locus_name_alt": "SALM_1937", + "locus_product": ">", + "locus_description": "GTPase HflX", + "locus_uid": "Q", + "dna_seq_len": 1836, + "dna_seq_hash": "a0d97d985483413f3c18bfe5833ae9ce", + "aa_seq_len": 612, + "aa_seq_hash": "d6012eddc7a6e8d7d40761d00ed71a5a", + "dna_min_len": 1285.2, + "dna_max_len": 2386.8, + "aa_min_len": 428.4, + "aa_max_len": 795.6, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "17": { + "seq_id": 17, + "locus_name": "locus_18", + "locus_name_alt": "SALM_1997", + "locus_product": "?", + "locus_description": "biosynthetic arginine decarboxylase", + "locus_uid": "R", + "dna_seq_len": 1914, + "dna_seq_hash": "b3021e979faa7600756c06dfadfcf14c", + "aa_seq_len": 638, + "aa_seq_hash": "e68e83956ee1d4c685571e5348c8def1", + "dna_min_len": 1339.8, + "dna_max_len": 2488.2, + "aa_min_len": 446.6, + "aa_max_len": 829.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "18": { + "seq_id": 18, + "locus_name": "locus_19", + "locus_name_alt": "SALM_9926", + "locus_product": ",", + "locus_description": "https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662", + "locus_uid": "S", + "dna_seq_len": 2037, + "dna_seq_hash": "a012eee23637b48e39b00808a057e35d", + "aa_seq_len": 679, + "aa_seq_hash": "1302fea1dbd5bb756db34e09b863ad44", + "dna_min_len": 1425.9, + "dna_max_len": 2648.1, + "aa_min_len": 475.3, + "aa_max_len": 882.7, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "19": { + "seq_id": 19, + "locus_name": "locus_20", + "locus_name_alt": "SALM_6064", + "locus_product": ".", + "locus_description": "alpha-2-macroglobulin family protein", + "locus_uid": "T", + "dna_seq_len": 4935, + "dna_seq_hash": "4461918e985715e4a2b07494e1f91326", + "aa_seq_len": 1645, + "aa_seq_hash": "12ff39a1933fa478729b142976b0a659", + "dna_min_len": 3454.5, + "dna_max_len": 6415.5, + "aa_min_len": 1151.5, + "aa_max_len": 2138.5, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + } + }, + "query_data": { + "sample_name": "G2", + "query_seq_data": { + "0": { + "parent_id": "locus_1:0:0:0", + "locus_name": "locus_1:0:0:0", + "seq_id": "locus_1:0:0:0", + "dna_hash": "d17b02d12afa7f832ee37df6f24a8f55", + "dna_len": 102, + "aa_hash": "a931d1f75114576e60538364eb01a05f", + "aa_len": 34, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "1": { + "parent_id": "locus_10:9:0:1", + "locus_name": "locus_10:9:0:1", + "seq_id": "locus_10:9:0:1", + "dna_hash": "fe04d17ec353c08b903c85fc0ca4dc02", + "dna_len": 762, + "aa_hash": "988bf512f0362e276b0e5622fbaa7079", + "aa_len": 254, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "2": { + "parent_id": "locus_11:10:0:2", + "locus_name": "locus_11:10:0:2", + "seq_id": "locus_11:10:0:2", + "dna_hash": "5b128d659955716833ce42f2bb060212", + "dna_len": 858, + "aa_hash": "d6a46f107d0604f27820147b523948c8", + "aa_len": 286, + "start_codon": "gtg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "3": { + "parent_id": "locus_12:11:0:3", + "locus_name": "locus_12:11:0:3", + "seq_id": "locus_12:11:0:3", + "dna_hash": "eb72da68c159497d5f0c8eeddc51b5ae", + "dna_len": 972, + "aa_hash": "2fb77f6d0f615a030a08af74f3eda277", + "aa_len": 324, + "start_codon": "atg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "4": { + "parent_id": "locus_13:12:0:4", + "locus_name": "locus_13:12:0:4", + "seq_id": "locus_13:12:0:4", + "dna_hash": "8f300259dcb46224bdc1fe5273107324", + "dna_len": 1098, + "aa_hash": "fd5140611af6e0d9f95426bf9beaa8b1", + "aa_len": 366, + "start_codon": "atg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "5": { + "parent_id": "locus_14:13:0:5", + "locus_name": "locus_14:13:0:5", + "seq_id": "locus_14:13:0:5", + "dna_hash": "b9060019038526aa6fc38d2f7510edc6", + "dna_len": 1281, + "aa_hash": "05bc7823b1abc2e6d4e2c08ca5325134", + "aa_len": 427, + "start_codon": "ttg", + "stop_codon": "tag", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "6": { + "parent_id": "locus_15:14:0:6", + "locus_name": "locus_15:14:0:6", + "seq_id": "locus_15:14:0:6", + "dna_hash": "bc98c2fe196a68a79036814396513a8d", + "dna_len": 1434, + "aa_hash": "5d8c3cf06444c3741ebfc57cc92db0d3", + "aa_len": 478, + "start_codon": "gtg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "7": { + "parent_id": "locus_16:15:0:7", + "locus_name": "locus_16:15:0:7", + "seq_id": "locus_16:15:0:7", + "dna_hash": "16e55766c603fe33c9e75d8e81743ae2", + "dna_len": 1464, + "aa_hash": "f85b3701f5642454bf4d2263feb13354", + "aa_len": 488, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "8": { + "parent_id": "locus_17:16:0:8", + "locus_name": "locus_17:16:0:8", + "seq_id": "locus_17:16:0:8", + "dna_hash": "a0d97d985483413f3c18bfe5833ae9ce", + "dna_len": 1836, + "aa_hash": "bdd2b967a7a6337b5973dd723a62fa43", + "aa_len": 612, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "9": { + "parent_id": "locus_18:17:0:9", + "locus_name": "locus_18:17:0:9", + "seq_id": "locus_18:17:0:9", + "dna_hash": "b3021e979faa7600756c06dfadfcf14c", + "dna_len": 1914, + "aa_hash": "42c4a831ee79a27c47138fe96829814b", + "aa_len": 638, + "start_codon": "atg", + "stop_codon": "tag", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "10": { + "parent_id": "locus_19:18:0:10", + "locus_name": "locus_19:18:0:10", + "seq_id": "locus_19:18:0:10", + "dna_hash": "a012eee23637b48e39b00808a057e35d", + "dna_len": 2037, + "aa_hash": "cb1202450e68e2b4f0d557a645f1a98d", + "aa_len": 679, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "11": { + "parent_id": "locus_2:1:0:11", + "locus_name": "locus_2:1:0:11", + "seq_id": "locus_2:1:0:11", + "dna_hash": "e35184c8ff18e9116fc8faef20532f56", + "dna_len": 285, + "aa_hash": "2a1a77c25ad681437705d9145aef608c", + "aa_len": 95, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "12": { + "parent_id": "locus_20:19:0:12", + "locus_name": "locus_20:19:0:12", + "seq_id": "locus_20:19:0:12", + "dna_hash": "4461918e985715e4a2b07494e1f91326", + "dna_len": 4935, + "aa_hash": "ffe361d9273afbab0ed9009dd5aa4041", + "aa_len": 1645, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "13": { + "parent_id": "locus_3:2:0:13", + "locus_name": "locus_3:2:0:13", + "seq_id": "locus_3:2:0:13", + "dna_hash": "670705cd2a59c4a23a897ac656a888fe", + "dna_len": 327, + "aa_hash": "f46f4f7b2362abaf1440c9752e10670e", + "aa_len": 109, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "14": { + "parent_id": "locus_4:3:0:14", + "locus_name": "locus_4:3:0:14", + "seq_id": "locus_4:3:0:14", + "dna_hash": "ac1b21798c0f672ad26f5a91ea278590", + "dna_len": 417, + "aa_hash": "dbcec3a0e9ecdc165c4e9162b079f2ee", + "aa_len": 139, + "start_codon": "ctg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "15": { + "parent_id": "locus_5:4:0:15", + "locus_name": "locus_5:4:0:15", + "seq_id": "locus_5:4:0:15", + "dna_hash": "d00defcca8588f21ce16fa1d0ac13389", + "dna_len": 444, + "aa_hash": "82d8baa0a3dad18a0efd8104ee15baae", + "aa_len": 148, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "16": { + "parent_id": "locus_6:5:0:16", + "locus_name": "locus_6:5:0:16", + "seq_id": "locus_6:5:0:16", + "dna_hash": "a11561f2804e2c32c78049f8b9aeb517", + "dna_len": 543, + "aa_hash": "f7a904be6607ca5a6d9388ad9ca3693a", + "aa_len": 181, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "17": { + "parent_id": "locus_7:6:0:17", + "locus_name": "locus_7:6:0:17", + "seq_id": "locus_7:6:0:17", + "dna_hash": "dc94bf1ec4ff9bed2a1f460cbd958656", + "dna_len": 606, + "aa_hash": "da78b534d889d8f35bec304ef54f1b93", + "aa_len": 202, + "start_codon": "gtg", + "stop_codon": "tag", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "18": { + "parent_id": "locus_8:7:0:18", + "locus_name": "locus_8:7:0:18", + "seq_id": "locus_8:7:0:18", + "dna_hash": "7ebe74afecf146ec4db816c8deced64f", + "dna_len": 642, + "aa_hash": "ad5a47cb3a55985083b963b0cf67f382", + "aa_len": 214, + "start_codon": "atg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "19": { + "parent_id": "locus_9:8:0:19", + "locus_name": "locus_9:8:0:19", + "seq_id": "locus_9:8:0:19", + "dna_hash": "41ebb36872854b2b33c8c028e23d8ad1", + "dna_len": 684, + "aa_hash": "02989d1dbb505d15f06e76ddc9174b7e", + "aa_len": 228, + "start_codon": "atg", + "stop_codon": "tag", + "count_internal_stop": 0, + "dna_ambig_count": 0 + } + }, + "query_hit_columns": [], + "query_hits": { + "0": { + "nucleotide": [ + { + "qseqid": 0, + "sseqid": 0, + "qlen": 102, + "slen": 102, + "qstart": 1, + "qend": 102, + "sstart": 1, + "send": 102, + "length": 102, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 1.8e-51, + "bitscore": 189 + } + ], + "protein": [ + { + "qseqid": 0, + "sseqid": 0, + "qlen": 34, + "slen": 34, + "qstart": 1, + "qend": 34, + "sstart": 1, + "send": 34, + "length": 34, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 4.94e-20, + "bitscore": 64.3 + } + ] + }, + "1": { + "nucleotide": [ + { + "qseqid": 1, + "sseqid": 9, + "qlen": 762, + "slen": 762, + "qstart": 1, + "qend": 762, + "sstart": 1, + "send": 762, + "length": 762, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1408 + } + ], + "protein": [ + { + "qseqid": 1, + "sseqid": 9, + "qlen": 254, + "slen": 254, + "qstart": 1, + "qend": 254, + "sstart": 1, + "send": 254, + "length": 254, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 514.0 + } + ] + }, + "2": { + "nucleotide": [ + { + "qseqid": 2, + "sseqid": 10, + "qlen": 858, + "slen": 858, + "qstart": 1, + "qend": 858, + "sstart": 1, + "send": 858, + "length": 858, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1585 + } + ], + "protein": [ + { + "qseqid": 2, + "sseqid": 10, + "qlen": 286, + "slen": 286, + "qstart": 1, + "qend": 286, + "sstart": 1, + "send": 286, + "length": 286, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 579.0 + } + ] + }, + "3": { + "nucleotide": [ + { + "qseqid": 3, + "sseqid": 11, + "qlen": 972, + "slen": 972, + "qstart": 1, + "qend": 972, + "sstart": 1, + "send": 972, + "length": 972, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1796 + } + ], + "protein": [ + { + "qseqid": 3, + "sseqid": 11, + "qlen": 324, + "slen": 324, + "qstart": 1, + "qend": 324, + "sstart": 1, + "send": 324, + "length": 324, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 640.0 + } + ] + }, + "4": { + "nucleotide": [ + { + "qseqid": 4, + "sseqid": 12, + "qlen": 1098, + "slen": 1098, + "qstart": 1, + "qend": 1098, + "sstart": 1, + "send": 1098, + "length": 1098, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 2028 + } + ], + "protein": [ + { + "qseqid": 4, + "sseqid": 12, + "qlen": 366, + "slen": 366, + "qstart": 1, + "qend": 366, + "sstart": 1, + "send": 366, + "length": 366, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 754.0 + } + ] + }, + "5": { + "nucleotide": [ + { + "qseqid": 5, + "sseqid": 13, + "qlen": 1281, + "slen": 1281, + "qstart": 1, + "qend": 1281, + "sstart": 1, + "send": 1281, + "length": 1281, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 2366 + } + ], + "protein": [ + { + "qseqid": 5, + "sseqid": 13, + "qlen": 427, + "slen": 427, + "qstart": 1, + "qend": 427, + "sstart": 1, + "send": 427, + "length": 427, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 869.0 + } + ] + }, + "6": { + "nucleotide": [ + { + "qseqid": 6, + "sseqid": 14, + "qlen": 1434, + "slen": 1434, + "qstart": 1, + "qend": 1434, + "sstart": 1, + "send": 1434, + "length": 1434, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 2649 + } + ], + "protein": [ + { + "qseqid": 6, + "sseqid": 14, + "qlen": 478, + "slen": 478, + "qstart": 1, + "qend": 478, + "sstart": 1, + "send": 478, + "length": 478, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 972.0 + } + ] + }, + "7": { + "nucleotide": [ + { + "qseqid": 7, + "sseqid": 15, + "qlen": 1464, + "slen": 1464, + "qstart": 1, + "qend": 1464, + "sstart": 1, + "send": 1464, + "length": 1464, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 2704 + } + ], + "protein": [ + { + "qseqid": 7, + "sseqid": 15, + "qlen": 488, + "slen": 488, + "qstart": 1, + "qend": 488, + "sstart": 1, + "send": 488, + "length": 488, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 1004.0 + } + ] + }, + "8": { + "nucleotide": [ + { + "qseqid": 8, + "sseqid": 16, + "qlen": 1836, + "slen": 1836, + "qstart": 1, + "qend": 1836, + "sstart": 1, + "send": 1836, + "length": 1836, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 3391 + } + ], + "protein": [ + { + "qseqid": 8, + "sseqid": 16, + "qlen": 612, + "slen": 612, + "qstart": 1, + "qend": 612, + "sstart": 1, + "send": 612, + "length": 612, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 1241.0 + } + ] + }, + "9": { + "nucleotide": [ + { + "qseqid": 9, + "sseqid": 17, + "qlen": 1914, + "slen": 1914, + "qstart": 1, + "qend": 1914, + "sstart": 1, + "send": 1914, + "length": 1914, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 3535 + } + ], + "protein": [ + { + "qseqid": 9, + "sseqid": 17, + "qlen": 638, + "slen": 638, + "qstart": 1, + "qend": 638, + "sstart": 1, + "send": 638, + "length": 638, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 1326.0 + } + ] + }, + "10": { + "nucleotide": [ + { + "qseqid": 10, + "sseqid": 18, + "qlen": 2037, + "slen": 2037, + "qstart": 1, + "qend": 2037, + "sstart": 1, + "send": 2037, + "length": 2037, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 3762 + } + ], + "protein": [ + { + "qseqid": 10, + "sseqid": 18, + "qlen": 679, + "slen": 679, + "qstart": 1, + "qend": 679, + "sstart": 1, + "send": 679, + "length": 679, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 1419.0 + } + ] + }, + "11": { + "nucleotide": [ + { + "qseqid": 11, + "sseqid": 1, + "qlen": 285, + "slen": 285, + "qstart": 1, + "qend": 285, + "sstart": 1, + "send": 285, + "length": 285, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 1.03e-152, + "bitscore": 527 + } + ], + "protein": [ + { + "qseqid": 11, + "sseqid": 1, + "qlen": 95, + "slen": 95, + "qstart": 1, + "qend": 95, + "sstart": 1, + "send": 95, + "length": 95, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 1.2299999999999998e-68, + "bitscore": 191.0 + } + ] + }, + "12": { + "nucleotide": [ + { + "qseqid": 12, + "sseqid": 19, + "qlen": 4935, + "slen": 4935, + "qstart": 1, + "qend": 4935, + "sstart": 1, + "send": 4935, + "length": 4935, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 9114 + } + ], + "protein": [ + { + "qseqid": 12, + "sseqid": 19, + "qlen": 1645, + "slen": 1645, + "qstart": 1, + "qend": 1645, + "sstart": 1, + "send": 1645, + "length": 1645, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 3332.0 + } + ] + }, + "13": { + "nucleotide": [ + { + "qseqid": 13, + "sseqid": 2, + "qlen": 327, + "slen": 327, + "qstart": 1, + "qend": 327, + "sstart": 1, + "send": 327, + "length": 327, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 5.34e-176, + "bitscore": 604 + } + ], + "protein": [ + { + "qseqid": 13, + "sseqid": 2, + "qlen": 109, + "slen": 109, + "qstart": 1, + "qend": 109, + "sstart": 1, + "send": 109, + "length": 109, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 3.09e-81, + "bitscore": 224.0 + } + ] + }, + "14": { + "nucleotide": [ + { + "qseqid": 14, + "sseqid": 3, + "qlen": 417, + "slen": 417, + "qstart": 1, + "qend": 417, + "sstart": 1, + "send": 417, + "length": 417, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 771 + } + ], + "protein": [ + { + "qseqid": 14, + "sseqid": 3, + "qlen": 139, + "slen": 139, + "qstart": 1, + "qend": 139, + "sstart": 1, + "send": 139, + "length": 139, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 2.34e-106, + "bitscore": 290.0 + } + ] + }, + "15": { + "nucleotide": [ + { + "qseqid": 15, + "sseqid": 4, + "qlen": 444, + "slen": 444, + "qstart": 1, + "qend": 444, + "sstart": 1, + "send": 444, + "length": 444, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 821 + } + ], + "protein": [ + { + "qseqid": 15, + "sseqid": 4, + "qlen": 148, + "slen": 148, + "qstart": 1, + "qend": 148, + "sstart": 1, + "send": 148, + "length": 148, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 1.0400000000000001e-110, + "bitscore": 302.0 + } + ] + }, + "16": { + "nucleotide": [ + { + "qseqid": 16, + "sseqid": 5, + "qlen": 543, + "slen": 543, + "qstart": 1, + "qend": 543, + "sstart": 1, + "send": 543, + "length": 543, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1003 + } + ], + "protein": [ + { + "qseqid": 16, + "sseqid": 5, + "qlen": 181, + "slen": 181, + "qstart": 1, + "qend": 181, + "sstart": 1, + "send": 181, + "length": 181, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 1.7899999999999999e-140, + "bitscore": 380.0 + } + ] + }, + "17": { + "nucleotide": [ + { + "qseqid": 17, + "sseqid": 6, + "qlen": 606, + "slen": 606, + "qstart": 1, + "qend": 606, + "sstart": 1, + "send": 606, + "length": 606, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1120 + } + ], + "protein": [ + { + "qseqid": 17, + "sseqid": 6, + "qlen": 202, + "slen": 202, + "qstart": 1, + "qend": 202, + "sstart": 1, + "send": 202, + "length": 202, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 3.71e-154, + "bitscore": 416.0 + } + ] + }, + "18": { + "nucleotide": [ + { + "qseqid": 18, + "sseqid": 7, + "qlen": 642, + "slen": 642, + "qstart": 1, + "qend": 642, + "sstart": 1, + "send": 642, + "length": 642, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1186 + } + ], + "protein": [ + { + "qseqid": 18, + "sseqid": 7, + "qlen": 214, + "slen": 214, + "qstart": 1, + "qend": 214, + "sstart": 1, + "send": 214, + "length": 214, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 1.75e-157, + "bitscore": 426.0 + } + ] + }, + "19": { + "nucleotide": [ + { + "qseqid": 19, + "sseqid": 8, + "qlen": 684, + "slen": 684, + "qstart": 1, + "qend": 684, + "sstart": 1, + "send": 684, + "length": 684, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1264 + } + ], + "protein": [ + { + "qseqid": 19, + "sseqid": 8, + "qlen": 228, + "slen": 228, + "qstart": 1, + "qend": 228, + "sstart": 1, + "send": 228, + "length": 228, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 6.32e-172, + "bitscore": 463.0 + } + ] + } + }, + "locus_profile": { + "locus_1": { + "nucleotide": [ + "0" + ], + "protein": [ + "0" + ] + }, + "locus_2": { + "nucleotide": [ + "11" + ], + "protein": [ + "11" + ] + }, + "locus_3": { + "nucleotide": [ + "13" + ], + "protein": [ + "13" + ] + }, + "locus_4": { + "nucleotide": [ + "14" + ], + "protein": [ + "14" + ] + }, + "locus_5": { + "nucleotide": [ + "15" + ], + "protein": [ + "15" + ] + }, + "locus_6": { + "nucleotide": [ + "16" + ], + "protein": [ + "16" + ] + }, + "locus_7": { + "nucleotide": [ + "17" + ], + "protein": [ + "17" + ] + }, + "locus_8": { + "nucleotide": [ + "18" + ], + "protein": [ + "18" + ] + }, + "locus_9": { + "nucleotide": [ + "19" + ], + "protein": [ + "19" + ] + }, + "locus_10": { + "nucleotide": [ + "1" + ], + "protein": [ + "1" + ] + }, + "locus_11": { + "nucleotide": [ + "2" + ], + "protein": [ + "2" + ] + }, + "locus_12": { + "nucleotide": [ + "3" + ], + "protein": [ + "3" + ] + }, + "locus_13": { + "nucleotide": [ + "4" + ], + "protein": [ + "4" + ] + }, + "locus_14": { + "nucleotide": [ + "5" + ], + "protein": [ + "5" + ] + }, + "locus_15": { + "nucleotide": [ + "6" + ], + "protein": [ + "6" + ] + }, + "locus_16": { + "nucleotide": [ + "7" + ], + "protein": [ + "7" + ] + }, + "locus_17": { + "nucleotide": [ + "8" + ], + "protein": [ + "8" + ] + }, + "locus_18": { + "nucleotide": [ + "9" + ], + "protein": [ + "9" + ] + }, + "locus_19": { + "nucleotide": [ + "10" + ], + "protein": [ + "10" + ] + }, + "locus_20": { + "nucleotide": [ + "12" + ], + "protein": [ + "12" + ] + } + } + }, + "query_hit_columns": [ + "qseqid", + "sseqid", + "qlen", + "slen", + "qstart", + "qend", + "sstart", + "send", + "length", + "mismatch", + "pident", + "qcovhsp", + "qcovs", + "sstrand", + "evalue", + "bitscore" + ] +} \ No newline at end of file diff --git a/tests/test_data/outputs/search/G3/blast/nucleotide/hsps.txt b/tests/test_data/outputs/search/G3/blast/nucleotide/hsps.txt new file mode 100755 index 0000000..cdab2ba --- /dev/null +++ b/tests/test_data/outputs/search/G3/blast/nucleotide/hsps.txt @@ -0,0 +1,20 @@ +0 0 102 102 1 102 1 102 102 0 100.000 100 100 plus 1.80e-51 189 +1 9 762 762 1 762 1 762 762 0 100.000 100 100 plus 0.0 1408 +2 10 858 858 1 858 1 858 858 19 97.786 100 100 plus 0.0 1480 +3 11 972 972 1 972 1 972 972 0 100.000 100 100 plus 0.0 1796 +4 12 1098 1098 1 1098 1 1098 1098 0 100.000 100 100 plus 0.0 2028 +5 13 1281 1281 1 1281 1 1281 1281 11 99.141 100 100 plus 0.0 2305 +6 14 1434 1434 1 1434 1 1434 1434 0 100.000 100 100 plus 0.0 2649 +7 15 1464 1464 1 1464 1 1464 1464 15 98.975 100 100 plus 0.0 2621 +8 16 1836 1836 1 1836 1 1836 1836 0 100.000 100 100 plus 0.0 3391 +9 17 1914 1914 1 1914 1 1914 1914 0 100.000 100 100 plus 0.0 3535 +10 18 2037 2037 1 2037 1 2037 2037 16 99.215 100 100 plus 0.0 3674 +11 1 285 285 1 285 1 285 285 17 94.035 100 100 plus 2.31e-124 433 +12 19 4935 4935 1 4935 1 4935 4935 0 100.000 100 100 plus 0.0 9114 +13 2 327 327 1 327 1 327 327 0 100.000 100 100 plus 5.34e-176 604 +14 3 417 417 1 417 1 417 417 11 97.362 100 100 plus 0.0 710 +15 4 444 444 1 444 1 444 444 15 96.622 100 100 plus 0.0 737 +16 5 543 543 1 543 1 543 543 0 100.000 100 100 plus 0.0 1003 +17 6 606 606 1 606 1 606 606 15 97.525 100 100 plus 0.0 1037 +18 7 642 642 1 642 1 642 642 0 100.000 100 100 plus 0.0 1186 +19 8 684 684 1 684 1 684 684 0 100.000 100 100 plus 0.0 1264 diff --git a/tests/test_data/outputs/search/G3/blast/nucleotide/queries.fasta b/tests/test_data/outputs/search/G3/blast/nucleotide/queries.fasta new file mode 100755 index 0000000..ce3c3a9 --- /dev/null +++ b/tests/test_data/outputs/search/G3/blast/nucleotide/queries.fasta @@ -0,0 +1,40 @@ +>0 +atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa +>1 +atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa +>2 +gtgaacctggtgaaaaccacgggtctgcacgcggcgggcaaaggcattaacgttgccaaagtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccgacttcaacttttccggctttgacgtgactcccgcagactgggaacgctttgttaacgactccctgagctggctgggtcagttcgatatggtctgcgtcagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatctttgatagtagccgtgaagcgttagtcgccggtcttaaagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcacgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgtgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggtaagtaccgtcggcgcaggcgattccatggttggcggattgatttacggcctgctgatgcgcgagtccagcgaacatacgctgcgtctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga +>3 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>4 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>5 +ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctcttccgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccaacgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcagaaaggcgggattggtttgcgcggtccgggtgaaacccagctcgaaaccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgaaaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgctgctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaaatccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtctgcaggtacgaatgccgattgtcgactggcgtcgcctctgtaaacaagaaccagcgttgatcgaatacgtgatctag +>6 +gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga +>7 +atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctggtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatcagcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacgctgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcattcaaccggaatggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaacgctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgaggcgctggcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtgagctgaccagccttgtgcgcaaccttaatcaactgctcaaaagcgagcgcgaacgttataacaaataccgcacgaccctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatcagcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcataccccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttttggccgacagcatcccacacagaaagaggaataa +>8 +atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa +>9 +atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag +>10 +atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacattgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccaccgtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggccgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattactggaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcattcagggcggtacggaagtttgtcagcaaaaactggataccatgcgcaacgcaggcgtaaaagtgaacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggcgtcagcgcagagatcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacaggagaccggcaagctcggcgagatcctgttctttatgcgtgcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccggcaataactggcagttcgacggcgacgcggaaactattgcccactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgaaattaccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa +>11 +atgaaactttttctgacaacggcagcgctaactgcgaccttgatttccggcatggcatttgccagcgatcccgtcatcccctgggcgactaacagcggcggtacggaaagcacgcatattgcggcaatgggcgaggatttgaatgcgcagcatcagcagatcacccacacgcatgaaggcgtatgggcggccaattccggcagcattcaggctgatgaagccgcgctgaccagcaacaagccgccggtacaaggccacccggaattgatgccgcatcagggctaa +>12 +atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa +>13 +atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa +>14 +ctgaaactaatacctttttatttgctcgcgctattttctgccgcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaaccactgcagataacccgccttataccatgacaatattgccgccagtagaaggctgcggggaaaaatgtgtgaaactgaacgtacagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaaggacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctataaccgacagctgcaattactttgggctggcgtaataccgaaaaataaaaaggggaaatggataacgaaaatggaaaattactggttcaggcaggtaaaagcgcactaa +>15 +atgaaaaaattttatagctgtcttcctgtctttttactgatcggctgtgcccaggtgcccctcccttcctccgtgagcaaaccggtacagcaacctggcgctcagaaagagcaactggccaacgcaaatagtattgatgagtgtcagtctcttccgtatgtgccgtcagaccttgcgaagaataaatcattatcaaaccagaacgctgataattccgcatcaaaaaatagcgcaatcagctcaagcattttttgcgaaaaatataaacaaaccaaagaacaggcgcttaccttcttccaggaacatccacaatacatgcgttcgaaagaagatgaagagcaactcatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa +>16 +atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa +>17 +gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggcctcacaacatgcctggagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgtcagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcaccatccagtcggcaacctggttttccgcaggcaggtcgcttacggcggataaaaaaatgatggattgcggcagcggtatttatgcttcaatcaatacgttactgaaaaagtcgcagaataaaaatatagtcatttttacccataatcattgtttgacgtatattgcaaaaaataagcgcggcgtgaagtttgatccggactatcttaatgcgctcgttatgtatgcggaaaacggaaaactctttttagacggtgaatttgttcccggctag +>18 +atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga +>19 +atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/search/G3/blast/protein/hsps.txt b/tests/test_data/outputs/search/G3/blast/protein/hsps.txt new file mode 100755 index 0000000..233979d --- /dev/null +++ b/tests/test_data/outputs/search/G3/blast/protein/hsps.txt @@ -0,0 +1,20 @@ +0 0 34 34 1 34 1 34 34 0 100.000 100 100 N/A 4.94e-20 64.3 +1 9 254 254 1 254 1 254 254 0 100.000 100 100 N/A 0.0 514 +2 10 286 286 1 286 1 286 286 17 94.056 100 100 N/A 0.0 518 +3 11 324 324 1 324 1 324 324 0 100.000 100 100 N/A 0.0 640 +4 12 366 366 1 366 1 366 366 0 100.000 100 100 N/A 0.0 754 +5 13 427 427 1 427 1 427 427 9 97.892 100 100 N/A 0.0 846 +6 14 478 478 1 478 1 478 478 0 100.000 100 100 N/A 0.0 972 +7 15 488 488 1 488 1 488 488 14 97.131 100 100 N/A 0.0 972 +8 16 612 612 1 612 1 612 612 0 100.000 100 100 N/A 0.0 1241 +9 17 638 638 1 638 1 638 638 0 100.000 100 100 N/A 0.0 1326 +10 18 679 679 1 679 1 679 679 8 98.822 100 100 N/A 0.0 1403 +11 1 95 95 1 95 1 95 95 11 88.421 100 100 N/A 2.89e-51 147 +12 19 1645 1645 1 1645 1 1645 1645 0 100.000 100 100 N/A 0.0 3332 +13 2 109 109 1 109 1 109 109 0 100.000 100 100 N/A 3.09e-81 224 +14 3 139 139 1 139 1 139 139 9 93.525 100 100 N/A 3.70e-98 269 +15 4 148 148 1 148 1 148 148 11 92.568 100 100 N/A 1.56e-97 268 +16 5 181 181 1 181 1 181 181 0 100.000 100 100 N/A 1.79e-140 380 +17 6 202 202 1 202 1 202 202 14 93.069 100 100 N/A 2.36e-141 384 +18 7 214 214 1 214 1 214 214 0 100.000 100 100 N/A 1.75e-157 426 +19 8 228 228 1 228 1 228 228 0 100.000 100 100 N/A 6.32e-172 463 diff --git a/tests/test_data/outputs/search/G3/blast/protein/queries.fasta b/tests/test_data/outputs/search/G3/blast/protein/queries.fasta new file mode 100755 index 0000000..3ac162b --- /dev/null +++ b/tests/test_data/outputs/search/G3/blast/protein/queries.fasta @@ -0,0 +1,40 @@ +>0 +MYDPPFLEALMITASFFAIFIIIVVSVLLLEGD* +>1 +MRILLSNDDGVHAPGIQTLAKALREFADVQVVAPDRNRSGASNSLTLESSLRTFTFDNGDIAVQMGTPTDCVYLGVNALMRPRPDIVVSGINAGPNLGDDVIYSGTVAAAMEGRHLGFPALAVSLNGYQHYDTAAAVTCALLRGLSREPLRTGRILNVNVPDLPLAQVKGIRVTRCGSRHPADKVIPQEDPRGNTLYWIGPPGDKYDAGPDTDFAAVDEGYVSVTPLHVDLTAHSAHDVVSDWLDSVGVGTQW* +>2 +VNLVKTTGLHAAGKGINVAKVLKDLGIDVTVGGFLGKDNQDGFQQLFSELGIANRFQVVQGRTRINVKLTEKDGEVTDFNFSGFDVTPADWERFVNDSLSWLGQFDMVCVSGSLPAGVSPEAFTDWMTRLRSQCPCIIFDSSREALVAGLKAAPWLVKPNRRELEIWAGRKLPEMKDVIDAAHALREQGIAHVVISLGAEGALWVNASGEWIAKPPAVDVVSTVGAGDSMVGGLIYGLLMRESSEHTLRLATAVAALAVSQSNVGITDRPQLAAMMARVDLQPFN* +>3 +MNKNKYSTPLLMLATILAGMLSPMQSAVNGQLGHWLQDGNACAVISFASGLVVMFFIIIARKETRQQFASIPTLIKKRKIPLWNWFAGLCGAMVVFSEGASASALGVATFQTALISALLLSGLLCDRFGIGVEEKKYFTPWRITGALFAVIATIFVVSPQWHSTSFILLAILPFLAGLLAGWQPAGNAKVAEATGSMLVSITWNFIVGFCVLGAALAIRIALGHVTIQLPDTWWMYLGGPLGLLSIGLMAILVRGLGLLMLGVASTAGQLLGSVLIDELIPSLGNTVYLVTIIGTLFALVGAIVTTIPEYRASKMAQKMEVSE* +>4 +MKGRWAKYVATGVMLAMLAACSSKPTDRGQQYKDGKFTQPFSLVNQPDAVGAPINAGDFAEQVDQIRSASPRLYTNQSNVYNAVQNWLRSGGDTRTMRQFGIDAWQMEGTDNYGNVQFTGYYTPVVQARHTRQGAFQYPIYSMPPKRGRLPSRAQIYAGALSDKYILAWSNSLMDNFIMDVQGSGYIDFGDGSPLNFFSYAGKNGWPYRSIGKVLIDRGEVKKEDMSMQAIREWGEKHSEAEVRELLEQNPSFVFFKPQSFAPVKGASAVPLIGRASVASDRSIIPPGTTLLAEVPLLDNNGKFSGQYELRLMVALDVGGAIKGQHFDIYQGIGPDAGHRAGWYNHYGRVWVLKSAPGAGNVFSG* +>5 +LFDRYDAGEQAVLVHIYFSQDKDMEDLQEFESLVSSAGVEAMQVITGSRKAPHPKYFVGEGKAVEIAEAVKATGAAVVLFDHALSPAQERNLERLCECRVIDRTGLILDIFAQRARTHEGKLQVELAQLRHLATRLVRGWTHLERQKGGIGLRGPGETQLETDRRLLRNRIVQIQSRLEKVEKQREQGRQSRIKADVPTVSLVGYTNAGKSTLFNQITEARVYAADQLFATLDPTLRRIDVADVGETVLADTVGFIRHLPYDLVAAFKATLQETRQATLLLHVVDAADVRVQENIEAVNTVLEEIDAHEIPTLMVMNKIDMLDDFEPRIDRDEENKPIRVWLSAQSGVGIPQLFQALTERLSGEVAQHTLRLLPQEGRLRSRFYQLQAIEKEWMEEDGSVSLQVRMPIVDWRRLCKQEPALIEYVI* +>6 +VAQRILVLGASGYIGQHLVFALSQQGHQVRAAARRVERLEKHRLANVSCHKVDLHWPENLPALLRDIDTVYYLVHGMGEGGDFIAHERQAALNVRDALRQTPVKQLIFLSSLQAPAHEQSDHLRARQLTADTLRDAGVPVTELRAGIIVGAGSAAFEVMRDMVYNLPILTPPRWVRSRTTPIALENLLYYLVGLLEHPAHEHRILEAAGPQVLSYQQQFERFMAVSGKRRPLIPVPFPTRWISVWFLNVITSVPPTTAKALIQGLRHDLLADDAALKKLIPQTLITFDDAVRRTLKEEEKLVNSSDWGYDALAFARWRPEYGYFPKQAGFTAQTPASLSALWQVVNRLGGKEGYFFGNILWQTRAAMDRLVGHKLAKGRPSHTLLKPGDTVDSWKVIIVEPEKQLTLLFGMKAPGLGRLSFTLHDKGRYREIDVRAWWHPHGMPGLIYWLLMIPAHLFIFRGMARRIARLAEQITEK* +>7 +MNKFARHFLPLSLRVRFLLATAGVVLVLSLAYGIVALVGYSVSFDKTTFRLLRGESNLFYTLAKWENNKISVELPENLDMQSPTMTLIYDETGKLLWTQRNIPWLIKSIQPEWLKTNGFHEIETNVDATSTLLSEDHSAQEKLKEVREDDDDAEMTHSVAVNIYPATTRMPQLTIVVVDTIPIELKRSYMVWSWFVYVLAANLLLVIPLLWIAAWWSLRPIEALAREVRELEDHHREMLNPETTRELTSLVRNLNQLLKSERERYNKYRTTLTDLTHSLKTPLAVLQSTLRSLRNEKMSVSKAEPVMLEQISRISQQIGYYLHRASMRGSGVLLSRELHPVAPLLDNLISALNKVYQRKGVNISMDISPEISFVGEQNDFVEVMGNVLDNACKYCLEFVEISARQTDDHLHIFVEDDGPGIPHSKRSLVFDRGQRADTLRPGQGVGLAVAREITEQYAGQIIASDSLLGGARMEVVFGRQHPTQKEE* +>8 +MTIQKRLLEAVEQKLLRPIDAQFALTVAGNDDPAVTLAAALLSHDAGEGHVCLPLSRLTLTEEAHPLLVAWISETATPIDWKKRLLASAAVSCGDSPAPLILCGDRLYLNRMWCNERTVARFFNEVNQAIAVDEDQLSRILDALFPPTDEVNWQKVAAAVALTRRISVISGGPGTGKTTTVAKLLAALIQMADGERCRIRLAAPTGKAAARLTESLGAALRQLPLTDAQKKRIPEDASTLHRLLGAQPGSQRLRHHAGNPLHLDVLVVDEASMIDLPMMSRLIDALPPHGRVIFLGDRDQLASVEAGAVLGDICAYVNAGFTAERARQLSRLTGSAIPAGAGTQAASLRDSLCLLQKSYRFGSDSGIGKLAAAINCGDRSAIQAVFQQGFSDIEKRTLQSSDDYAGMLDEALAGYGRYLRLLHEKAAPEAILQAFNEYQLLCALREGPFGVRGLNDRIEQAMVQQRKIQRHPHSRWYEGRPVMIARNDSALGLFNGDIGIALDRGQGLRVWFVMPDGTIKSVQPSRLPEHDTTWAMTVHKSQGSEFDHAALILPSQRSPVVTRELVYTAVTRARRRLSLYADERILAGAIVTRTERRSGLATLFDEVSRIG* +>9 +MQEVAMSSQEASKMLRTYNIAWWGNNYYDVNELGHISVCPDPDVPEARVDLAKLVKAREAQGQRLPALFCFPQILQHRLRSINAAFKRARESYGYNGDYFLVYPIKVNQHRRVIESLIHSGEPLGLEAGSKAELMAVLAHAGMTRSVIVCNGYKDREYIRLALIGEKMGHKVYLVIEKMSEIAIVLEEAERLNVVPRLGVRARLASQGSGKWQSSGGEKSKFGLAATQVLQLVETLRDAGRLDSLQLLHFHLGSQMANIRDIATGVRESARFYVELHKLGVNIQCFDVGGGLGVDYEGTRSQSDCSVNYGLNEYANNIIWAIGDACEEHGLPHPTVITESGRAVTAHHTVLVSNIIGVERNEYTDPTAPAEDAPRALQNLWETWQEMHKPGTRRSLREWLHDSQMDLHDIHIGYSSGAFSLQERAWAEQLYLSMCHEVQKQLDPQNRAHRPIIDELQERMADKMYVNFSLFQSMPDAWGIDQLFPVLPLEGLDQVPERRAVLLDITCDSDGAIDHYIDGDGIATTMPMPEYDPENPPMLGFFMVGAYQEILGNMHNLFGDTEAVDVFVFPDGSVEVELSDEGDTVADMLQYVQLDPKTLLTHFRDQVKQTDLDDALQQQFLEEFEAGLYGYTYLEDE* +>10 +MNSLPQRSTDFELTTSQDGFALSWQQRLILRHSAENPCLWIGAGVADIDMFRGNFSIKDKLNEKIALTEATVSELPDGWLVQFSRGATISATLRISADEAGRLTLDLQNDDLHHNRIWLRLAANPDDHIYGCGEQFSYFDLRGKPFPLWTSEQGVGRNKTSYVTWQADCKENAGGDYYWTFFPQPTFVSTQKYYCHVDNSCYMNFDFSAPEYHELALWEDKTTLRFECADTYIALLEKLTALLGRQPELPDWVYDGVTLGIQGGTEVCQQKLDTMRNAGVKVNGIWAQDWSGIRMTSFGKRVMWNWKWNSDNYPQLDSRIKQWKEEGVQFLSYINPYVASDKDLCAEAARHGYLAKDATGGDYLVEFGEFYGGVVDLTNPEAYDWFKDVIKKNMIALGCSGWMADFGEYLPTDTYLHNGVSAEIMHNAWPALWAKCNYEALQETGKLGEILFFMRAGYTGSQKYSTMMWAGDQNVDWSLDDGLASVVPAALSLAMTGHGLHHSDIGGYTTLFDMKRSKELLLRWCDFSAFTPMMRTHEGNRPGNNWQFDGDAETIAHFARMTTVFTTLKPYLKQAVAQNAATGLPVMRPLFLHYENDAATYTLKYQYLLGQDLLVAPVHEQGRCDWTLYLPEDHWVNIWTGEAHHGGEITVDAPIGKPPVFYRAKSEWALLFASLRNI* +>11 +MKLFLTTAALTATLISGMAFASDPVIPWATNSGGTESTHIAAMGEDLNAQHQQITHTHEGVWAANSGSIQADEAALTSNKPPVQGHPELMPHQG* +>12 +MKHLRVVACMIMLALAGCDNNDKTAPTTKSEAPAVAQPSPAQDPSQLQKLAQQSQGKALTLLDASEAQLDGAATLVLTFSIPLDPEQDFSRVVHVVDKKSGSVDGAWELAPNLKELRLRHLEPERVLVVTVDPAVKALNNATFGKSYEKTITTRDVQPSVGFASRGSLLPGKIAEGLPVMALNVNHVDVNFFRVKPGSLASFVSQWEYRSSLSNWESDNLLKMADLVYTGRFDLNPARNTREKLLLPLSDIKPLQQAGVYVAVMNQAGHYNYSNAATLFTLSDIGVSAHRYHSRLDIFTQSLENGAAQSGIEIVLLNDKGQTLAQATSDAQGHVQLEADKAAALLLARKEGQTTLLDLTLPALDLSEFNVAGAPGYSKQFFMFGPRDLYRPGETVILNGLLRDSDGKTLPDQPVKLEVVKPDGQVMRTVVSQPENGLYRLNYPLDINAPTGLWHVRANTGDNLLRSWDFHVEDFMPERMALNLTAQKTPLAPADEVKFSVVGYYLYGAPANGNTLQGQLFLRPLRDAVAALPGFQFGNIAEENLSRSLDEVQLTLDKGGRGEVSAASQWQEAHSPLQVILQASLLESGGRPVTRRVEQAIWPADTLPGIRPQFAAKAVYDYRTDTTVNQPIVDEDSNAAFDIVYANAQGEKKAVSGLQVRLIRERRDYYWNWSESEGWQSQFDQKDLVEGEQTLDLNADETGKVSFPVEWGAYRLEVKAPNETVSSVRFWAGYSWQDNSDGSGAARPDRVTLKLDKANYRPGDTMKLHIAAPVAGKGYAMVESSDGPLWWQAIDVPAQGLELTIPVDKTWNRHDLYLSTLVVRPGDKSRSATPKRAVGLLHLPLGDDNRRLDLALESPAKMRPNQPLTVRVKASVKHGEMPKQINVLVSAVDSGVLNITDYATPDPWQAFFGQKRYGADIYDIYGQVIEGQGRLAALRFGGDGDDLTRGGKPPVNHANIIAQQAQPITLNEQGEGVVTLPIGDFNGELRVMAQAWTADDFGRGESKVVVAAPVIAELNMPRFLAGGDVSRLVLDVTNLTDRPQTLNIALAASGLLELLSQQPQPVNLAPGVRTTLFVPVRALEGFGEGEIQATISGLNLPGETLGAQHKQWQIGVRPAWPAQTVNSGIALAPGESWHVPEQHLANVSPATLQGQLLLSGKPPLNLARYIRELKAYPYGCLEQTTSGLFPALYTNAAQLQSLGITGDSDEKRRAAVDIGISRILQMQRDNGGFALWDENGAEEPWLTAYAMDFLIRAGEQGYSVPPEAINRGNERLLRYLQDPGTMLIRYSDNTQASTFAAQAYAALVLARQQKAPLGALREIWERRSQAASGLPLMQLGIALNTMGDARRGEEAITLALNTPRQDERQWIADYGSSLRDNALMLSLLEENNLRPDAQNALLSSLSEQAFGQRWLSTQENNALFLAAHSRQASAGAWQAQTSLEAQPLSGDKALTRNLDADQLAALEVTNTGSQPLWLRLDSSGYPSSAPEPASNVLQIERQILGTDGQRKSLSSLRSGELVLVWLTVVADRNVPDALVVDLLPAGLELENQNLADSSASLPESGSEVQNLLNQMQQADIQYMEFRDDRFVAAVVVNEGQPVTLVYLARAVTPGTYQLAQPQVESMYAPQWRATGASEGLLIVTP* +>13 +MAKNRSRRLRKKMHIDEFQELGFSVAWRFPEGTSEEQIDKTVDDFINDVIEPNKLAFDGSGYLAWEGLICMQEIGKCTEEHQAIVRKWLEARNLEEVRTSELFDVWWD* +>14 +LKLIPFYLLALFSAASGATEINACKDLIGTWKTTADNPPYTMTILPPVEGCGEKCVKLNVQYELDVTHRNALYCHEGQEGVKGQGPMVIAFEGAYGGHAIGTYNRQLQLLWAGVIPKNKKGKWITKMENYWFRQVKAH* +>15 +MKKFYSCLPVFLLIGCAQVPLPSSVSKPVQQPGAQKEQLANANSIDECQSLPYVPSDLAKNKSLSNQNADNSASKNSAISSSIFCEKYKQTKEQALTFFQEHPQYMRSKEDEEQLMTEFKKVLLEPGSKNLSIYQTLLAAHERLQAL* +>16 +MRIKPDDNWRWYYDEEHDRMMLDLANGMLFRSRFSRKMLTPDAFCPTGFCVDDAALYFSFEEKCRDFELTKEQRAELVLNALVAIRYLKPQMPKSWHFVAHGEMWTPGTGDAASVWLSDTAEQVNLLVVEPGENAALCLLAQPGVVIAGRTMQLGDAIKIMNDRLKPQVHCHSFSLEQAV* +>17 +VLAFTLRFIKNKRYFAILAGALVIIAGLASQHAWSGNGLPQINGKALAALAKQHPVVVLFRHAERCDRSDNTCLSDSTGITVNGAQDARALGKAFSADIQNYNLYSSNTVRTIQSATWFSAGRSLTADKKMMDCGSGIYASINTLLKKSQNKNIVIFTHNHCLTYIAKNKRGVKFDPDYLNALVMYAENGKLFLDGEFVPG* +>18 +MGSNYIVIEGLEGAGKTTARDVVVETLEQLGIRNMIFTREPGGTQLAEKLRSLVLDIRSVGDEVITDKAEVLMFYAARVQLVETVIKPALAQGVWVIGDRHDLSTQAYQGGGRGIDQTMLATLRDAVLGDFRPDLTLYLDVTPEVGLKRARARGDLDRIEQESFDFFNRTRARYLELAAQDSRIRTIDATQPLDAVMRDIRATVTKWVQEQAA* +>19 +MKHIKKSVLVVLLTSHVAHASIVVGGTRLVFDGNNDESSINVENKDSKANLVQSWLSVADPQVTNKQVFIITPPLFRLDAGQKNSIRVIRSGAPLPADRESMYWLNIKGIPSIDDNASANRVEISINTQIKLIYRPPALTKSTPDSQSQQLKWQTAGDVITVNNPTPYYMNFASVTLNSHEVKSATFVPPKSSASFKLSSTAAPHGTVTWRLISDYGMSLEPHSGSF* diff --git a/tests/test_data/outputs/search/G3/run.json b/tests/test_data/outputs/search/G3/run.json new file mode 100755 index 0000000..a346eb7 --- /dev/null +++ b/tests/test_data/outputs/search/G3/run.json @@ -0,0 +1,27 @@ +{ + "analysis_start_time": "10/06/2024 11:11:45", + "parameters": { + "query": "locidex/extract/G3/raw.extracted.seqs.fasta", + "outdir": "locidex/search/G3", + "name": "G3", + "db": "locidex/db", + "config": null, + "min_evalue": 0.0001, + "min_dna_len": 1, + "min_aa_len": 1, + "max_dna_len": 10000000, + "max_aa_len": 10000000, + "min_dna_ident": 80.0, + "min_aa_ident": 80.0, + "min_dna_match_cov": 80.0, + "min_aa_match_cov": 80.0, + "max_target_seqs": 10, + "n_threads": 8, + "format": null, + "translation_table": 11, + "annotate": false, + "force": true + }, + "result_file": "locidex/search/G3/seq_store.json", + "analysis_end_time": "10/06/2024 11:11:47" +} \ No newline at end of file diff --git a/tests/test_data/outputs/search/G3/seq_store.json b/tests/test_data/outputs/search/G3/seq_store.json new file mode 100755 index 0000000..67c8d69 --- /dev/null +++ b/tests/test_data/outputs/search/G3/seq_store.json @@ -0,0 +1,1744 @@ +{ + "db_info": { + "db_name": "Locidex Test Database", + "db_version": "1.0.0", + "db_date": "2024/06/06", + "db_author": "James Robertson", + "db_desc": "Using Salmonella GCA_000007545.2 as source data", + "db_num_seqs": 20, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" + }, + "db_seq_info": { + "0": { + "seq_id": 0, + "locus_name": "locus_1", + "locus_name_alt": "SALM_11273", + "locus_product": "!", + "locus_description": "hypothetical protein", + "locus_uid": "1", + "dna_seq_len": 102, + "dna_seq_hash": "d17b02d12afa7f832ee37df6f24a8f55", + "aa_seq_len": 34, + "aa_seq_hash": "84ac553cb45dd790c497c27152888f02", + "dna_min_len": 71.4, + "dna_max_len": 132.6, + "aa_min_len": 23.8, + "aa_max_len": 44.2, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "1": { + "seq_id": 1, + "locus_name": "locus_2", + "locus_name_alt": "SALM_120", + "locus_product": "@", + "locus_description": "outer membrane protein", + "locus_uid": "B", + "dna_seq_len": 285, + "dna_seq_hash": "e35184c8ff18e9116fc8faef20532f56", + "aa_seq_len": 95, + "aa_seq_hash": "2d9e7f7f0d11bf02db860b0799e7924d", + "dna_min_len": 199.5, + "dna_max_len": 370.5, + "aa_min_len": 66.5, + "aa_max_len": 123.5, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "2": { + "seq_id": 2, + "locus_name": "locus_3", + "locus_name_alt": "SALM_2016", + "locus_product": "#", + "locus_description": "tRNA (guanosine(46)-N7)-methyltransferase TrmB", + "locus_uid": "C", + "dna_seq_len": 327, + "dna_seq_hash": "670705cd2a59c4a23a897ac656a888fe", + "aa_seq_len": 109, + "aa_seq_hash": "4277f8bbf1fd6682001da089fea83d04", + "dna_min_len": 228.9, + "dna_max_len": 425.1, + "aa_min_len": 76.3, + "aa_max_len": 141.7, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "3": { + "seq_id": 3, + "locus_name": "locus_4", + "locus_name_alt": "SALM_8644", + "locus_product": "$", + "locus_description": "AZ624_004720", + "locus_uid": "AZ624_004720", + "dna_seq_len": 417, + "dna_seq_hash": "ac1b21798c0f672ad26f5a91ea278590", + "aa_seq_len": 139, + "aa_seq_hash": "0c25367401155278f34832f184ab44e6", + "dna_min_len": 291.9, + "dna_max_len": 542.1, + "aa_min_len": 97.3, + "aa_max_len": 180.7, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "4": { + "seq_id": 4, + "locus_name": "locus_5", + "locus_name_alt": "SALM_1876", + "locus_product": "%", + "locus_description": "SPI-1 type III secretion system invasion lipoprotein InvH", + "locus_uid": "E", + "dna_seq_len": 444, + "dna_seq_hash": "d00defcca8588f21ce16fa1d0ac13389", + "aa_seq_len": 148, + "aa_seq_hash": "c8bf12a8057fc5e541fcd4924136a40d", + "dna_min_len": 310.8, + "dna_max_len": 577.2, + "aa_min_len": 103.6, + "aa_max_len": 192.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "5": { + "seq_id": 5, + "locus_name": "locus_6", + "locus_name_alt": "SALM_640", + "locus_product": "^", + "locus_description": "MOSC domain-containing protein", + "locus_uid": "F", + "dna_seq_len": 543, + "dna_seq_hash": "a11561f2804e2c32c78049f8b9aeb517", + "aa_seq_len": 181, + "aa_seq_hash": "1f6a45ea291940ef2c17ec1cfdd5fbdd", + "dna_min_len": 380.1, + "dna_max_len": 705.9, + "aa_min_len": 126.7, + "aa_max_len": 235.3, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "6": { + "seq_id": 6, + "locus_name": "locus_7", + "locus_name_alt": "SALM_1501", + "locus_product": "&", + "locus_description": "India: Vellore", + "locus_uid": "G", + "dna_seq_len": 606, + "dna_seq_hash": "dc94bf1ec4ff9bed2a1f460cbd958656", + "aa_seq_len": 202, + "aa_seq_hash": "62252b3326997117f127efb88ff09294", + "dna_min_len": 424.2, + "dna_max_len": 787.8, + "aa_min_len": 141.4, + "aa_max_len": 262.6, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "7": { + "seq_id": 7, + "locus_name": "locus_8", + "locus_name_alt": "SALM_756", + "locus_product": "*", + "locus_description": "DNA polymerase III subunit delta'", + "locus_uid": "H", + "dna_seq_len": 642, + "dna_seq_hash": "7ebe74afecf146ec4db816c8deced64f", + "aa_seq_len": 214, + "aa_seq_hash": "2449629747a7c58f0f2cad411db87178", + "dna_min_len": 449.4, + "dna_max_len": 834.6, + "aa_min_len": 149.8, + "aa_max_len": 278.2, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "8": { + "seq_id": 8, + "locus_name": "locus_9", + "locus_name_alt": "SALM_7353", + "locus_product": "1", + "locus_description": "fimbrial assembly chaperone", + "locus_uid": "I", + "dna_seq_len": 684, + "dna_seq_hash": "41ebb36872854b2b33c8c028e23d8ad1", + "aa_seq_len": 228, + "aa_seq_hash": "a75a93991228940fb46d917f238beb5a", + "dna_min_len": 478.8, + "dna_max_len": 889.2, + "aa_min_len": 159.6, + "aa_max_len": 296.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "9": { + "seq_id": 9, + "locus_name": "locus_10", + "locus_name_alt": "SALM_1891", + "locus_product": "200.96", + "locus_description": "5'/3'-nucleotidase SurE", + "locus_uid": "J", + "dna_seq_len": 762, + "dna_seq_hash": "fe04d17ec353c08b903c85fc0ca4dc02", + "aa_seq_len": 254, + "aa_seq_hash": "bd09702e070040e0fc8d2ec3b830812c", + "dna_min_len": 533.4, + "dna_max_len": 990.6, + "aa_min_len": 177.8, + "aa_max_len": 330.2, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "10": { + "seq_id": 10, + "locus_name": "locus_11", + "locus_name_alt": "SALM_1452", + "locus_product": "|", + "locus_description": "1-phosphofructokinase", + "locus_uid": "K", + "dna_seq_len": 858, + "dna_seq_hash": "5b128d659955716833ce42f2bb060212", + "aa_seq_len": 286, + "aa_seq_hash": "4daecf1a6ccae76e1d97a4ce9ee4ff0b", + "dna_min_len": 600.6, + "dna_max_len": 1115.4, + "aa_min_len": 200.2, + "aa_max_len": 371.8, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "11": { + "seq_id": 11, + "locus_name": "locus_12", + "locus_name_alt": "SALM_11020", + "locus_product": "_", + "locus_description": "1", + "locus_uid": "L", + "dna_seq_len": 972, + "dna_seq_hash": "eb72da68c159497d5f0c8eeddc51b5ae", + "aa_seq_len": 324, + "aa_seq_hash": "a1f16dd269dc295e715f2d00f9c26b43", + "dna_min_len": 680.4, + "dna_max_len": 1263.6, + "aa_min_len": 226.8, + "aa_max_len": 421.2, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "12": { + "seq_id": 12, + "locus_name": "locus_13", + "locus_name_alt": "SALM_1934", + "locus_product": "-", + "locus_description": "3.9", + "locus_uid": "M", + "dna_seq_len": 1098, + "dna_seq_hash": "8f300259dcb46224bdc1fe5273107324", + "aa_seq_len": 366, + "aa_seq_hash": "b16bc26e42f5bc4327504b1ec8b2d53d", + "dna_min_len": 768.6, + "dna_max_len": 1427.4, + "aa_min_len": 256.2, + "aa_max_len": 475.8, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "13": { + "seq_id": 13, + "locus_name": "locus_14", + "locus_name_alt": "SALM_2871", + "locus_product": "+", + "locus_description": "@", + "locus_uid": "N", + "dna_seq_len": 1281, + "dna_seq_hash": "b9060019038526aa6fc38d2f7510edc6", + "aa_seq_len": 427, + "aa_seq_hash": "ed11561b2bedaa12c7a28eb0e9346101", + "dna_min_len": 896.7, + "dna_max_len": 1665.3, + "aa_min_len": 298.9, + "aa_max_len": 555.1, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "14": { + "seq_id": 14, + "locus_name": "locus_15", + "locus_name_alt": "SALM_583", + "locus_product": "=", + "locus_description": "DMT family transporter", + "locus_uid": "O", + "dna_seq_len": 1434, + "dna_seq_hash": "bc98c2fe196a68a79036814396513a8d", + "aa_seq_len": 478, + "aa_seq_hash": "16cb2acec887d5861327e04f2705b8ce", + "dna_min_len": 1003.8, + "dna_max_len": 1864.2, + "aa_min_len": 334.6, + "aa_max_len": 621.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "15": { + "seq_id": 15, + "locus_name": "locus_16", + "locus_name_alt": "SALM_780", + "locus_product": "<", + "locus_description": "murein transglycosylase A", + "locus_uid": "P", + "dna_seq_len": 1464, + "dna_seq_hash": "16e55766c603fe33c9e75d8e81743ae2", + "aa_seq_len": 488, + "aa_seq_hash": "b20314e55f9713235e9c4ea5817b56df", + "dna_min_len": 1024.8, + "dna_max_len": 1903.2, + "aa_min_len": 341.6, + "aa_max_len": 634.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "16": { + "seq_id": 16, + "locus_name": "locus_17", + "locus_name_alt": "SALM_1937", + "locus_product": ">", + "locus_description": "GTPase HflX", + "locus_uid": "Q", + "dna_seq_len": 1836, + "dna_seq_hash": "a0d97d985483413f3c18bfe5833ae9ce", + "aa_seq_len": 612, + "aa_seq_hash": "d6012eddc7a6e8d7d40761d00ed71a5a", + "dna_min_len": 1285.2, + "dna_max_len": 2386.8, + "aa_min_len": 428.4, + "aa_max_len": 795.6, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "17": { + "seq_id": 17, + "locus_name": "locus_18", + "locus_name_alt": "SALM_1997", + "locus_product": "?", + "locus_description": "biosynthetic arginine decarboxylase", + "locus_uid": "R", + "dna_seq_len": 1914, + "dna_seq_hash": "b3021e979faa7600756c06dfadfcf14c", + "aa_seq_len": 638, + "aa_seq_hash": "e68e83956ee1d4c685571e5348c8def1", + "dna_min_len": 1339.8, + "dna_max_len": 2488.2, + "aa_min_len": 446.6, + "aa_max_len": 829.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "18": { + "seq_id": 18, + "locus_name": "locus_19", + "locus_name_alt": "SALM_9926", + "locus_product": ",", + "locus_description": "https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662", + "locus_uid": "S", + "dna_seq_len": 2037, + "dna_seq_hash": "a012eee23637b48e39b00808a057e35d", + "aa_seq_len": 679, + "aa_seq_hash": "1302fea1dbd5bb756db34e09b863ad44", + "dna_min_len": 1425.9, + "dna_max_len": 2648.1, + "aa_min_len": 475.3, + "aa_max_len": 882.7, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "19": { + "seq_id": 19, + "locus_name": "locus_20", + "locus_name_alt": "SALM_6064", + "locus_product": ".", + "locus_description": "alpha-2-macroglobulin family protein", + "locus_uid": "T", + "dna_seq_len": 4935, + "dna_seq_hash": "4461918e985715e4a2b07494e1f91326", + "aa_seq_len": 1645, + "aa_seq_hash": "12ff39a1933fa478729b142976b0a659", + "dna_min_len": 3454.5, + "dna_max_len": 6415.5, + "aa_min_len": 1151.5, + "aa_max_len": 2138.5, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + } + }, + "query_data": { + "sample_name": "G3", + "query_seq_data": { + "0": { + "parent_id": "locus_1:0:0:0", + "locus_name": "locus_1:0:0:0", + "seq_id": "locus_1:0:0:0", + "dna_hash": "d17b02d12afa7f832ee37df6f24a8f55", + "dna_len": 102, + "aa_hash": "a931d1f75114576e60538364eb01a05f", + "aa_len": 34, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "1": { + "parent_id": "locus_10:9:0:1", + "locus_name": "locus_10:9:0:1", + "seq_id": "locus_10:9:0:1", + "dna_hash": "fe04d17ec353c08b903c85fc0ca4dc02", + "dna_len": 762, + "aa_hash": "988bf512f0362e276b0e5622fbaa7079", + "aa_len": 254, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "2": { + "parent_id": "locus_11:10:0:2", + "locus_name": "locus_11:10:0:2", + "seq_id": "locus_11:10:0:2", + "dna_hash": "c4266f2f24fdd8e039113c6b0955af9f", + "dna_len": 858, + "aa_hash": "9b9be0e0a2b6f84053716d6c14a0fb9a", + "aa_len": 286, + "start_codon": "gtg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "3": { + "parent_id": "locus_12:11:0:3", + "locus_name": "locus_12:11:0:3", + "seq_id": "locus_12:11:0:3", + "dna_hash": "eb72da68c159497d5f0c8eeddc51b5ae", + "dna_len": 972, + "aa_hash": "2fb77f6d0f615a030a08af74f3eda277", + "aa_len": 324, + "start_codon": "atg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "4": { + "parent_id": "locus_13:12:0:4", + "locus_name": "locus_13:12:0:4", + "seq_id": "locus_13:12:0:4", + "dna_hash": "8f300259dcb46224bdc1fe5273107324", + "dna_len": 1098, + "aa_hash": "fd5140611af6e0d9f95426bf9beaa8b1", + "aa_len": 366, + "start_codon": "atg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "5": { + "parent_id": "locus_14:13:0:5", + "locus_name": "locus_14:13:0:5", + "seq_id": "locus_14:13:0:5", + "dna_hash": "2fa0b06ed72e36b4071cab9d0b4f87d0", + "dna_len": 1281, + "aa_hash": "bf5190f310477277da454725d434a8ee", + "aa_len": 427, + "start_codon": "ttg", + "stop_codon": "tag", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "6": { + "parent_id": "locus_15:14:0:6", + "locus_name": "locus_15:14:0:6", + "seq_id": "locus_15:14:0:6", + "dna_hash": "bc98c2fe196a68a79036814396513a8d", + "dna_len": 1434, + "aa_hash": "5d8c3cf06444c3741ebfc57cc92db0d3", + "aa_len": 478, + "start_codon": "gtg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "7": { + "parent_id": "locus_16:15:0:7", + "locus_name": "locus_16:15:0:7", + "seq_id": "locus_16:15:0:7", + "dna_hash": "a9b3cb97dac3cda6e932a49bf9a507bd", + "dna_len": 1464, + "aa_hash": "3ca5f1d7b46eda9460608ef61603c12f", + "aa_len": 488, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "8": { + "parent_id": "locus_17:16:0:8", + "locus_name": "locus_17:16:0:8", + "seq_id": "locus_17:16:0:8", + "dna_hash": "a0d97d985483413f3c18bfe5833ae9ce", + "dna_len": 1836, + "aa_hash": "bdd2b967a7a6337b5973dd723a62fa43", + "aa_len": 612, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "9": { + "parent_id": "locus_18:17:0:9", + "locus_name": "locus_18:17:0:9", + "seq_id": "locus_18:17:0:9", + "dna_hash": "b3021e979faa7600756c06dfadfcf14c", + "dna_len": 1914, + "aa_hash": "42c4a831ee79a27c47138fe96829814b", + "aa_len": 638, + "start_codon": "atg", + "stop_codon": "tag", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "10": { + "parent_id": "locus_19:18:0:10", + "locus_name": "locus_19:18:0:10", + "seq_id": "locus_19:18:0:10", + "dna_hash": "de32372598811d63bcc1a0eaf6872644", + "dna_len": 2037, + "aa_hash": "a48a4e4dc8c7f61a7be06a7f72142198", + "aa_len": 679, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "11": { + "parent_id": "locus_2:1:0:11", + "locus_name": "locus_2:1:0:11", + "seq_id": "locus_2:1:0:11", + "dna_hash": "8b70e777f6bbf2c91ff75947824b5976", + "dna_len": 285, + "aa_hash": "6e403f4ed2da629ea2ebfe18278ed120", + "aa_len": 95, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "12": { + "parent_id": "locus_20:19:0:12", + "locus_name": "locus_20:19:0:12", + "seq_id": "locus_20:19:0:12", + "dna_hash": "4461918e985715e4a2b07494e1f91326", + "dna_len": 4935, + "aa_hash": "ffe361d9273afbab0ed9009dd5aa4041", + "aa_len": 1645, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "13": { + "parent_id": "locus_3:2:0:13", + "locus_name": "locus_3:2:0:13", + "seq_id": "locus_3:2:0:13", + "dna_hash": "670705cd2a59c4a23a897ac656a888fe", + "dna_len": 327, + "aa_hash": "f46f4f7b2362abaf1440c9752e10670e", + "aa_len": 109, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "14": { + "parent_id": "locus_4:3:0:14", + "locus_name": "locus_4:3:0:14", + "seq_id": "locus_4:3:0:14", + "dna_hash": "73790840c76943caac0ebb3b2b3f0b98", + "dna_len": 417, + "aa_hash": "77784601d754a5f36152853592023b08", + "aa_len": 139, + "start_codon": "ctg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "15": { + "parent_id": "locus_5:4:0:15", + "locus_name": "locus_5:4:0:15", + "seq_id": "locus_5:4:0:15", + "dna_hash": "8cf4341689dd00f74adfcc43d1f4a35e", + "dna_len": 444, + "aa_hash": "736cc3184dda2c5ac596f76753272622", + "aa_len": 148, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "16": { + "parent_id": "locus_6:5:0:16", + "locus_name": "locus_6:5:0:16", + "seq_id": "locus_6:5:0:16", + "dna_hash": "a11561f2804e2c32c78049f8b9aeb517", + "dna_len": 543, + "aa_hash": "f7a904be6607ca5a6d9388ad9ca3693a", + "aa_len": 181, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "17": { + "parent_id": "locus_7:6:0:17", + "locus_name": "locus_7:6:0:17", + "seq_id": "locus_7:6:0:17", + "dna_hash": "49d9878c9d3071aa1d2f26cb947b784c", + "dna_len": 606, + "aa_hash": "a1169e1ef4c2882247a9349da07cb6bd", + "aa_len": 202, + "start_codon": "gtg", + "stop_codon": "tag", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "18": { + "parent_id": "locus_8:7:0:18", + "locus_name": "locus_8:7:0:18", + "seq_id": "locus_8:7:0:18", + "dna_hash": "7ebe74afecf146ec4db816c8deced64f", + "dna_len": 642, + "aa_hash": "ad5a47cb3a55985083b963b0cf67f382", + "aa_len": 214, + "start_codon": "atg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "19": { + "parent_id": "locus_9:8:0:19", + "locus_name": "locus_9:8:0:19", + "seq_id": "locus_9:8:0:19", + "dna_hash": "41ebb36872854b2b33c8c028e23d8ad1", + "dna_len": 684, + "aa_hash": "02989d1dbb505d15f06e76ddc9174b7e", + "aa_len": 228, + "start_codon": "atg", + "stop_codon": "tag", + "count_internal_stop": 0, + "dna_ambig_count": 0 + } + }, + "query_hit_columns": [], + "query_hits": { + "0": { + "nucleotide": [ + { + "qseqid": 0, + "sseqid": 0, + "qlen": 102, + "slen": 102, + "qstart": 1, + "qend": 102, + "sstart": 1, + "send": 102, + "length": 102, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 1.8e-51, + "bitscore": 189 + } + ], + "protein": [ + { + "qseqid": 0, + "sseqid": 0, + "qlen": 34, + "slen": 34, + "qstart": 1, + "qend": 34, + "sstart": 1, + "send": 34, + "length": 34, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 4.94e-20, + "bitscore": 64.3 + } + ] + }, + "1": { + "nucleotide": [ + { + "qseqid": 1, + "sseqid": 9, + "qlen": 762, + "slen": 762, + "qstart": 1, + "qend": 762, + "sstart": 1, + "send": 762, + "length": 762, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1408 + } + ], + "protein": [ + { + "qseqid": 1, + "sseqid": 9, + "qlen": 254, + "slen": 254, + "qstart": 1, + "qend": 254, + "sstart": 1, + "send": 254, + "length": 254, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 514.0 + } + ] + }, + "2": { + "nucleotide": [ + { + "qseqid": 2, + "sseqid": 10, + "qlen": 858, + "slen": 858, + "qstart": 1, + "qend": 858, + "sstart": 1, + "send": 858, + "length": 858, + "mismatch": 19, + "pident": 97.786, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1480 + } + ], + "protein": [ + { + "qseqid": 2, + "sseqid": 10, + "qlen": 286, + "slen": 286, + "qstart": 1, + "qend": 286, + "sstart": 1, + "send": 286, + "length": 286, + "mismatch": 17, + "pident": 94.056, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 518.0 + } + ] + }, + "3": { + "nucleotide": [ + { + "qseqid": 3, + "sseqid": 11, + "qlen": 972, + "slen": 972, + "qstart": 1, + "qend": 972, + "sstart": 1, + "send": 972, + "length": 972, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1796 + } + ], + "protein": [ + { + "qseqid": 3, + "sseqid": 11, + "qlen": 324, + "slen": 324, + "qstart": 1, + "qend": 324, + "sstart": 1, + "send": 324, + "length": 324, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 640.0 + } + ] + }, + "4": { + "nucleotide": [ + { + "qseqid": 4, + "sseqid": 12, + "qlen": 1098, + "slen": 1098, + "qstart": 1, + "qend": 1098, + "sstart": 1, + "send": 1098, + "length": 1098, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 2028 + } + ], + "protein": [ + { + "qseqid": 4, + "sseqid": 12, + "qlen": 366, + "slen": 366, + "qstart": 1, + "qend": 366, + "sstart": 1, + "send": 366, + "length": 366, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 754.0 + } + ] + }, + "5": { + "nucleotide": [ + { + "qseqid": 5, + "sseqid": 13, + "qlen": 1281, + "slen": 1281, + "qstart": 1, + "qend": 1281, + "sstart": 1, + "send": 1281, + "length": 1281, + "mismatch": 11, + "pident": 99.141, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 2305 + } + ], + "protein": [ + { + "qseqid": 5, + "sseqid": 13, + "qlen": 427, + "slen": 427, + "qstart": 1, + "qend": 427, + "sstart": 1, + "send": 427, + "length": 427, + "mismatch": 9, + "pident": 97.892, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 846.0 + } + ] + }, + "6": { + "nucleotide": [ + { + "qseqid": 6, + "sseqid": 14, + "qlen": 1434, + "slen": 1434, + "qstart": 1, + "qend": 1434, + "sstart": 1, + "send": 1434, + "length": 1434, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 2649 + } + ], + "protein": [ + { + "qseqid": 6, + "sseqid": 14, + "qlen": 478, + "slen": 478, + "qstart": 1, + "qend": 478, + "sstart": 1, + "send": 478, + "length": 478, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 972.0 + } + ] + }, + "7": { + "nucleotide": [ + { + "qseqid": 7, + "sseqid": 15, + "qlen": 1464, + "slen": 1464, + "qstart": 1, + "qend": 1464, + "sstart": 1, + "send": 1464, + "length": 1464, + "mismatch": 15, + "pident": 98.975, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 2621 + } + ], + "protein": [ + { + "qseqid": 7, + "sseqid": 15, + "qlen": 488, + "slen": 488, + "qstart": 1, + "qend": 488, + "sstart": 1, + "send": 488, + "length": 488, + "mismatch": 14, + "pident": 97.131, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 972.0 + } + ] + }, + "8": { + "nucleotide": [ + { + "qseqid": 8, + "sseqid": 16, + "qlen": 1836, + "slen": 1836, + "qstart": 1, + "qend": 1836, + "sstart": 1, + "send": 1836, + "length": 1836, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 3391 + } + ], + "protein": [ + { + "qseqid": 8, + "sseqid": 16, + "qlen": 612, + "slen": 612, + "qstart": 1, + "qend": 612, + "sstart": 1, + "send": 612, + "length": 612, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 1241.0 + } + ] + }, + "9": { + "nucleotide": [ + { + "qseqid": 9, + "sseqid": 17, + "qlen": 1914, + "slen": 1914, + "qstart": 1, + "qend": 1914, + "sstart": 1, + "send": 1914, + "length": 1914, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 3535 + } + ], + "protein": [ + { + "qseqid": 9, + "sseqid": 17, + "qlen": 638, + "slen": 638, + "qstart": 1, + "qend": 638, + "sstart": 1, + "send": 638, + "length": 638, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 1326.0 + } + ] + }, + "10": { + "nucleotide": [ + { + "qseqid": 10, + "sseqid": 18, + "qlen": 2037, + "slen": 2037, + "qstart": 1, + "qend": 2037, + "sstart": 1, + "send": 2037, + "length": 2037, + "mismatch": 16, + "pident": 99.215, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 3674 + } + ], + "protein": [ + { + "qseqid": 10, + "sseqid": 18, + "qlen": 679, + "slen": 679, + "qstart": 1, + "qend": 679, + "sstart": 1, + "send": 679, + "length": 679, + "mismatch": 8, + "pident": 98.822, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 1403.0 + } + ] + }, + "11": { + "nucleotide": [ + { + "qseqid": 11, + "sseqid": 1, + "qlen": 285, + "slen": 285, + "qstart": 1, + "qend": 285, + "sstart": 1, + "send": 285, + "length": 285, + "mismatch": 17, + "pident": 94.035, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 2.3100000000000004e-124, + "bitscore": 433 + } + ], + "protein": [ + { + "qseqid": 11, + "sseqid": 1, + "qlen": 95, + "slen": 95, + "qstart": 1, + "qend": 95, + "sstart": 1, + "send": 95, + "length": 95, + "mismatch": 11, + "pident": 88.421, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 2.89e-51, + "bitscore": 147.0 + } + ] + }, + "12": { + "nucleotide": [ + { + "qseqid": 12, + "sseqid": 19, + "qlen": 4935, + "slen": 4935, + "qstart": 1, + "qend": 4935, + "sstart": 1, + "send": 4935, + "length": 4935, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 9114 + } + ], + "protein": [ + { + "qseqid": 12, + "sseqid": 19, + "qlen": 1645, + "slen": 1645, + "qstart": 1, + "qend": 1645, + "sstart": 1, + "send": 1645, + "length": 1645, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 3332.0 + } + ] + }, + "13": { + "nucleotide": [ + { + "qseqid": 13, + "sseqid": 2, + "qlen": 327, + "slen": 327, + "qstart": 1, + "qend": 327, + "sstart": 1, + "send": 327, + "length": 327, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 5.34e-176, + "bitscore": 604 + } + ], + "protein": [ + { + "qseqid": 13, + "sseqid": 2, + "qlen": 109, + "slen": 109, + "qstart": 1, + "qend": 109, + "sstart": 1, + "send": 109, + "length": 109, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 3.09e-81, + "bitscore": 224.0 + } + ] + }, + "14": { + "nucleotide": [ + { + "qseqid": 14, + "sseqid": 3, + "qlen": 417, + "slen": 417, + "qstart": 1, + "qend": 417, + "sstart": 1, + "send": 417, + "length": 417, + "mismatch": 11, + "pident": 97.362, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 710 + } + ], + "protein": [ + { + "qseqid": 14, + "sseqid": 3, + "qlen": 139, + "slen": 139, + "qstart": 1, + "qend": 139, + "sstart": 1, + "send": 139, + "length": 139, + "mismatch": 9, + "pident": 93.525, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 3.6999999999999996e-98, + "bitscore": 269.0 + } + ] + }, + "15": { + "nucleotide": [ + { + "qseqid": 15, + "sseqid": 4, + "qlen": 444, + "slen": 444, + "qstart": 1, + "qend": 444, + "sstart": 1, + "send": 444, + "length": 444, + "mismatch": 15, + "pident": 96.622, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 737 + } + ], + "protein": [ + { + "qseqid": 15, + "sseqid": 4, + "qlen": 148, + "slen": 148, + "qstart": 1, + "qend": 148, + "sstart": 1, + "send": 148, + "length": 148, + "mismatch": 11, + "pident": 92.568, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 1.5600000000000001e-97, + "bitscore": 268.0 + } + ] + }, + "16": { + "nucleotide": [ + { + "qseqid": 16, + "sseqid": 5, + "qlen": 543, + "slen": 543, + "qstart": 1, + "qend": 543, + "sstart": 1, + "send": 543, + "length": 543, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1003 + } + ], + "protein": [ + { + "qseqid": 16, + "sseqid": 5, + "qlen": 181, + "slen": 181, + "qstart": 1, + "qend": 181, + "sstart": 1, + "send": 181, + "length": 181, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 1.7899999999999999e-140, + "bitscore": 380.0 + } + ] + }, + "17": { + "nucleotide": [ + { + "qseqid": 17, + "sseqid": 6, + "qlen": 606, + "slen": 606, + "qstart": 1, + "qend": 606, + "sstart": 1, + "send": 606, + "length": 606, + "mismatch": 15, + "pident": 97.525, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1037 + } + ], + "protein": [ + { + "qseqid": 17, + "sseqid": 6, + "qlen": 202, + "slen": 202, + "qstart": 1, + "qend": 202, + "sstart": 1, + "send": 202, + "length": 202, + "mismatch": 14, + "pident": 93.069, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 2.36e-141, + "bitscore": 384.0 + } + ] + }, + "18": { + "nucleotide": [ + { + "qseqid": 18, + "sseqid": 7, + "qlen": 642, + "slen": 642, + "qstart": 1, + "qend": 642, + "sstart": 1, + "send": 642, + "length": 642, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1186 + } + ], + "protein": [ + { + "qseqid": 18, + "sseqid": 7, + "qlen": 214, + "slen": 214, + "qstart": 1, + "qend": 214, + "sstart": 1, + "send": 214, + "length": 214, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 1.75e-157, + "bitscore": 426.0 + } + ] + }, + "19": { + "nucleotide": [ + { + "qseqid": 19, + "sseqid": 8, + "qlen": 684, + "slen": 684, + "qstart": 1, + "qend": 684, + "sstart": 1, + "send": 684, + "length": 684, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1264 + } + ], + "protein": [ + { + "qseqid": 19, + "sseqid": 8, + "qlen": 228, + "slen": 228, + "qstart": 1, + "qend": 228, + "sstart": 1, + "send": 228, + "length": 228, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 6.32e-172, + "bitscore": 463.0 + } + ] + } + }, + "locus_profile": { + "locus_1": { + "nucleotide": [ + "0" + ], + "protein": [ + "0" + ] + }, + "locus_2": { + "nucleotide": [ + "11" + ], + "protein": [ + "11" + ] + }, + "locus_3": { + "nucleotide": [ + "13" + ], + "protein": [ + "13" + ] + }, + "locus_4": { + "nucleotide": [ + "14" + ], + "protein": [ + "14" + ] + }, + "locus_5": { + "nucleotide": [ + "15" + ], + "protein": [ + "15" + ] + }, + "locus_6": { + "nucleotide": [ + "16" + ], + "protein": [ + "16" + ] + }, + "locus_7": { + "nucleotide": [ + "17" + ], + "protein": [ + "17" + ] + }, + "locus_8": { + "nucleotide": [ + "18" + ], + "protein": [ + "18" + ] + }, + "locus_9": { + "nucleotide": [ + "19" + ], + "protein": [ + "19" + ] + }, + "locus_10": { + "nucleotide": [ + "1" + ], + "protein": [ + "1" + ] + }, + "locus_11": { + "nucleotide": [ + "2" + ], + "protein": [ + "2" + ] + }, + "locus_12": { + "nucleotide": [ + "3" + ], + "protein": [ + "3" + ] + }, + "locus_13": { + "nucleotide": [ + "4" + ], + "protein": [ + "4" + ] + }, + "locus_14": { + "nucleotide": [ + "5" + ], + "protein": [ + "5" + ] + }, + "locus_15": { + "nucleotide": [ + "6" + ], + "protein": [ + "6" + ] + }, + "locus_16": { + "nucleotide": [ + "7" + ], + "protein": [ + "7" + ] + }, + "locus_17": { + "nucleotide": [ + "8" + ], + "protein": [ + "8" + ] + }, + "locus_18": { + "nucleotide": [ + "9" + ], + "protein": [ + "9" + ] + }, + "locus_19": { + "nucleotide": [ + "10" + ], + "protein": [ + "10" + ] + }, + "locus_20": { + "nucleotide": [ + "12" + ], + "protein": [ + "12" + ] + } + } + }, + "query_hit_columns": [ + "qseqid", + "sseqid", + "qlen", + "slen", + "qstart", + "qend", + "sstart", + "send", + "length", + "mismatch", + "pident", + "qcovhsp", + "qcovs", + "sstrand", + "evalue", + "bitscore" + ] +} \ No newline at end of file diff --git a/tests/test_data/outputs/search/G4/blast/nucleotide/hsps.txt b/tests/test_data/outputs/search/G4/blast/nucleotide/hsps.txt new file mode 100755 index 0000000..00093e5 --- /dev/null +++ b/tests/test_data/outputs/search/G4/blast/nucleotide/hsps.txt @@ -0,0 +1,20 @@ +0 0 102 102 1 102 1 102 102 0 100.000 100 100 plus 1.80e-51 189 +1 9 762 762 1 762 1 762 762 0 100.000 100 100 plus 0.0 1408 +2 10 858 858 1 858 1 858 858 0 100.000 100 100 plus 0.0 1585 +3 11 972 972 1 972 1 972 972 0 100.000 100 100 plus 0.0 1796 +4 12 1098 1098 1 1098 1 1098 1098 0 100.000 100 100 plus 0.0 2028 +5 13 1281 1281 1 1281 1 1281 1281 0 100.000 100 100 plus 0.0 2366 +6 14 1434 1434 1 1434 1 1434 1434 0 100.000 100 100 plus 0.0 2649 +7 15 1464 1464 1 1464 1 1464 1464 0 100.000 100 100 plus 0.0 2704 +8 16 1836 1836 1 1836 1 1836 1836 0 100.000 100 100 plus 0.0 3391 +9 17 1914 1914 1 1914 1 1914 1914 0 100.000 100 100 plus 0.0 3535 +10 18 2037 2037 1 2037 1 2037 2037 0 100.000 100 100 plus 0.0 3762 +11 1 285 285 1 285 1 285 285 0 100.000 100 100 plus 1.03e-152 527 +12 19 4935 4935 1 4935 1 4935 4935 0 100.000 100 100 plus 0.0 9114 +13 2 327 327 1 327 1 327 327 0 100.000 100 100 plus 5.34e-176 604 +14 3 417 417 1 417 1 417 417 0 100.000 100 100 plus 0.0 771 +15 4 444 444 1 444 1 444 444 0 100.000 100 100 plus 0.0 821 +16 5 543 543 1 543 1 543 543 0 100.000 100 100 plus 0.0 1003 +17 6 606 606 1 606 1 606 606 0 100.000 100 100 plus 0.0 1120 +18 7 642 642 1 642 1 642 642 0 100.000 100 100 plus 0.0 1186 +19 8 684 684 1 684 1 684 684 0 100.000 100 100 plus 0.0 1264 diff --git a/tests/test_data/outputs/search/G4/blast/nucleotide/queries.fasta b/tests/test_data/outputs/search/G4/blast/nucleotide/queries.fasta new file mode 100755 index 0000000..9cad9a4 --- /dev/null +++ b/tests/test_data/outputs/search/G4/blast/nucleotide/queries.fasta @@ -0,0 +1,40 @@ +>0 +atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa +>1 +atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa +>2 +gtgaacctggtgaaaaccccgggtctgcacgcggcgggcaaaggcattaacgttgccaatgtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccaacttcaactttcccggctttgacgtgactcccgcagactgggaacgctttgttaacgacttcctgagctggctgggtcagttcgatatggtctgcgtaagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatcgttgatactagccgtgtagagttagtcgccggtcttatagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcccgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgcgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggaaaataccgtcggcgcaggcggttccatggttggcggatggatttacggcctgctgatgcgcgagtccaccgaacatacgctgcgcctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga +>3 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>4 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>5 +ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctattacgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccagcgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcggaaaggcgggattggttcgcgcggtccgggtgaaacccagctcgaagccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgagaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgcagctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaattccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtccgcaggtacgaatgccgattgtcgactggcgtcgcttctgtaaacaagaaccagcgttgatcgaatacgtgatctag +>6 +gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga +>7 +atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctcgtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatctgcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacggtgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcactcaaccggaagggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaaggctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgatgcgctgtcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtaagctgatcagccttgtgcgcaaccttaatcaactgctcaaaagcgagcacgaacgttataacaaataccgcacgagcctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatctgcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcatagcccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttctggccgacagcatcccacacagaaagaggaataa +>8 +atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa +>9 +atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag +>10 +atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacatcgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccacagtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggtcgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattacttgaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcatacagggcggtacggaagtttgtcagcaaaaactggataacatgcgcaacgcaggcgtaaaagtgtacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggggtcagcgcagagctcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacagaagaccggcaagctcggcgagatcctgttctttatgcgggcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccagcaatgactggcagttcgacggcgacgcggaaactattgcacactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgagatttccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa +>11 +atgaaactttttctgacaacggcagcgctaactgcgaccttgacttccggcatgggatttgccagcgataccgtgatcccctgggcgactaacagtggcggtacggaaagcacgcatatcgcggcaatgggcgaggatgtgaatgcgcagcatcagcacatcatccacacgcatgaaggcgtatgtgccgcaaattccggcaccattcaggctgatgaagccgcgctgaccagcaacaagccaccggtacaagtccaaccggaattgttgccgcatcagggctaa +>12 +atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa +>13 +atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa +>14 +ctgaaactaatacctttttatttgctcgcgctattttctgcctcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaacccctgcagataacccgccttataccgtgacaatattgccgccagtagaagcctgcggggaaaaatgtgtgaaactgaacgtagagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaacgacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctacaaccgacagctgcaattacttagggctggcgtaataccgaaaaataaaaagtggaaatggataacgaagatggaaaattactggttcaggcgggtaaaagcgcactaa +>15 +atgaaaaaatttgatagctgtcatcctgtctttttactgatcggctgtgcccaggtgcctctccgttcctccgtgagcaaaccggtacagcaacctagcgctcagaaagagcaactggccaacgcaaatggtattgatgagtgtcagtcacttccgtatgtgccttcagaccttgcgaagaataaatcattatcaaaccagatcgctgataataccgcatcaaaaaatagcgcaatcagctcacgcattttttgcgaaaaatataagcaaaccaaagaacaggcgcttaccttcttccaggaacttccacaatacatgcgttcgaaagaagttgaagagcaacacatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa +>16 +atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa +>17 +gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggactcacaacatgcctgtagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgccagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcgccatccagtcggcaacctggttttcggcaggcaggtcgcgtagggcggttaaaaaaatgatggattgcggcagcggtatttattcttcaatcaatacgttactgaaaaattcgcagattaaaaatatagtcattttaacccataatcattgtttgacgtatattgtaaaaaataagcgcggcgtgaagtttgatccggagtatcttaatgcgctcgttatgtatgcggaaaacggaaaactccttttagacggtgaatttgttcccggctag +>18 +atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga +>19 +atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/search/G4/blast/protein/hsps.txt b/tests/test_data/outputs/search/G4/blast/protein/hsps.txt new file mode 100755 index 0000000..73033ac --- /dev/null +++ b/tests/test_data/outputs/search/G4/blast/protein/hsps.txt @@ -0,0 +1,20 @@ +0 0 34 34 1 34 1 34 34 0 100.000 100 100 N/A 4.94e-20 64.3 +1 9 254 254 1 254 1 254 254 0 100.000 100 100 N/A 0.0 514 +2 10 286 286 1 286 1 286 286 0 100.000 100 100 N/A 0.0 579 +3 11 324 324 1 324 1 324 324 0 100.000 100 100 N/A 0.0 640 +4 12 366 366 1 366 1 366 366 0 100.000 100 100 N/A 0.0 754 +5 13 427 427 1 427 1 427 427 0 100.000 100 100 N/A 0.0 869 +6 14 478 478 1 478 1 478 478 0 100.000 100 100 N/A 0.0 972 +7 15 488 488 1 488 1 488 488 0 100.000 100 100 N/A 0.0 1004 +8 16 612 612 1 612 1 612 612 0 100.000 100 100 N/A 0.0 1241 +9 17 638 638 1 638 1 638 638 0 100.000 100 100 N/A 0.0 1326 +10 18 679 679 1 679 1 679 679 0 100.000 100 100 N/A 0.0 1419 +11 1 95 95 1 95 1 95 95 0 100.000 100 100 N/A 1.23e-68 191 +12 19 1645 1645 1 1645 1 1645 1645 0 100.000 100 100 N/A 0.0 3332 +13 2 109 109 1 109 1 109 109 0 100.000 100 100 N/A 3.09e-81 224 +14 3 139 139 1 139 1 139 139 0 100.000 100 100 N/A 2.34e-106 290 +15 4 148 148 1 148 1 148 148 0 100.000 100 100 N/A 1.04e-110 302 +16 5 181 181 1 181 1 181 181 0 100.000 100 100 N/A 1.79e-140 380 +17 6 202 202 1 202 1 202 202 0 100.000 100 100 N/A 3.71e-154 416 +18 7 214 214 1 214 1 214 214 0 100.000 100 100 N/A 1.75e-157 426 +19 8 228 228 1 228 1 228 228 0 100.000 100 100 N/A 6.32e-172 463 diff --git a/tests/test_data/outputs/search/G4/blast/protein/queries.fasta b/tests/test_data/outputs/search/G4/blast/protein/queries.fasta new file mode 100755 index 0000000..34499a1 --- /dev/null +++ b/tests/test_data/outputs/search/G4/blast/protein/queries.fasta @@ -0,0 +1,40 @@ +>0 +MYDPPFLEALMITASFFAIFIIIVVSVLLLEGD* +>1 +MRILLSNDDGVHAPGIQTLAKALREFADVQVVAPDRNRSGASNSLTLESSLRTFTFDNGDIAVQMGTPTDCVYLGVNALMRPRPDIVVSGINAGPNLGDDVIYSGTVAAAMEGRHLGFPALAVSLNGYQHYDTAAAVTCALLRGLSREPLRTGRILNVNVPDLPLAQVKGIRVTRCGSRHPADKVIPQEDPRGNTLYWIGPPGDKYDAGPDTDFAAVDEGYVSVTPLHVDLTAHSAHDVVSDWLDSVGVGTQW* +>2 +VNLVKTPGLHAAGKGINVANVLKDLGIDVTVGGFLGKDNQDGFQQLFSELGIANRFQVVQGRTRINVKLTEKDGEVTNFNFPGFDVTPADWERFVNDFLSWLGQFDMVCVSGSLPAGVSPEAFTDWMTRLRSQCPCIIVDTSRVELVAGLIAAPWLVKPNRRELEIWAGRKLPEMKDVIDAAPALREQGIAHVVISLGAEGALRVNASGEWIAKPPAVDVENTVGAGGSMVGGWIYGLLMRESTEHTLRLATAVAALAVSQSNVGITDRPQLAAMMARVDLQPFN* +>3 +MNKNKYSTPLLMLATILAGMLSPMQSAVNGQLGHWLQDGNACAVISFASGLVVMFFIIIARKETRQQFASIPTLIKKRKIPLWNWFAGLCGAMVVFSEGASASALGVATFQTALISALLLSGLLCDRFGIGVEEKKYFTPWRITGALFAVIATIFVVSPQWHSTSFILLAILPFLAGLLAGWQPAGNAKVAEATGSMLVSITWNFIVGFCVLGAALAIRIALGHVTIQLPDTWWMYLGGPLGLLSIGLMAILVRGLGLLMLGVASTAGQLLGSVLIDELIPSLGNTVYLVTIIGTLFALVGAIVTTIPEYRASKMAQKMEVSE* +>4 +MKGRWAKYVATGVMLAMLAACSSKPTDRGQQYKDGKFTQPFSLVNQPDAVGAPINAGDFAEQVDQIRSASPRLYTNQSNVYNAVQNWLRSGGDTRTMRQFGIDAWQMEGTDNYGNVQFTGYYTPVVQARHTRQGAFQYPIYSMPPKRGRLPSRAQIYAGALSDKYILAWSNSLMDNFIMDVQGSGYIDFGDGSPLNFFSYAGKNGWPYRSIGKVLIDRGEVKKEDMSMQAIREWGEKHSEAEVRELLEQNPSFVFFKPQSFAPVKGASAVPLIGRASVASDRSIIPPGTTLLAEVPLLDNNGKFSGQYELRLMVALDVGGAIKGQHFDIYQGIGPDAGHRAGWYNHYGRVWVLKSAPGAGNVFSG* +>5 +LFDRYDAGEQAVLVHIYFSQDKDMEDLQEFESLVYYAGVEAMQVITGSRKAPHPKYFVGEGKAVEIAEAVKATGAAVVLFDHALSPAQERNLERLCECRVIDRTGLILDIFAQRARTHEGKLQVELAQLRHLATRLVRGWTHLERRKGGIGSRGPGETQLEADRRLLRNRIVQIQSRLEKVEKQREQGRQSRIKADVPTVSLVGYTNAGKSTLFNQITEARVYAADQLFATLDPTLRRIDVADVGETVLADTVGFIRHLPYDLVAAFKATLQETRQATQLLHVVDAADVRVQENIEAVNTVLEEIDAHEFPTLMVMNKIDMLDDFEPRIDRDEENKPIRVWLSAQSGVGIPQLFQALTERLSGEVAQHTLRLLPQEGRLRSRFYQLQAIEKEWMEEDGSVSPQVRMPIVDWRRFCKQEPALIEYVI* +>6 +VAQRILVLGASGYIGQHLVFALSQQGHQVRAAARRVERLEKHRLANVSCHKVDLHWPENLPALLRDIDTVYYLVHGMGEGGDFIAHERQAALNVRDALRQTPVKQLIFLSSLQAPAHEQSDHLRARQLTADTLRDAGVPVTELRAGIIVGAGSAAFEVMRDMVYNLPILTPPRWVRSRTTPIALENLLYYLVGLLEHPAHEHRILEAAGPQVLSYQQQFERFMAVSGKRRPLIPVPFPTRWISVWFLNVITSVPPTTAKALIQGLRHDLLADDAALKKLIPQTLITFDDAVRRTLKEEEKLVNSSDWGYDALAFARWRPEYGYFPKQAGFTAQTPASLSALWQVVNRLGGKEGYFFGNILWQTRAAMDRLVGHKLAKGRPSHTLLKPGDTVDSWKVIIVEPEKQLTLLFGMKAPGLGRLSFTLHDKGRYREIDVRAWWHPHGMPGLIYWLLMIPAHLFIFRGMARRIARLAEQITEK* +>7 +MNKFARHFLPLSLRVRFLLATAGVVLVLSLAYGIVALVGYSVSFDKTTFRLLRGESNLFYTLAKWENNKICVELPENLDMQSPTMTVIYDETGKLLWTQRNIPWLIKSTQPEGLKTNGFHEIETNVDATSTLLSEDHSAQEKLKEVREDDDDAEMTHSVAVNIYPATTRMPQLTIVVVDTIPIELKGSYMVWSWFVYVLAANLLLVIPLLWIAAWWSLRPIDALSREVRELEDHHREMLNPETTRKLISLVRNLNQLLKSEHERYNKYRTSLTDLTHSLKTPLAVLQSTLRSLRNEKMSVSKAEPVMLEQISRISQQIGYYLHRASMRGSGVLLSRELHPVAPLLDNLISALNKVYLRKGVNISMDISPEISFVGEQNDFVEVMGNVLDNACKYCLEFVEISARQTDDHLHIFVEDDGPGIAHSKRSLVFDRGQRADTLRPGQGVGLAVAREITEQYAGQIIASDSLLGGARMEVVSGRQHPTQKEE* +>8 +MTIQKRLLEAVEQKLLRPIDAQFALTVAGNDDPAVTLAAALLSHDAGEGHVCLPLSRLTLTEEAHPLLVAWISETATPIDWKKRLLASAAVSCGDSPAPLILCGDRLYLNRMWCNERTVARFFNEVNQAIAVDEDQLSRILDALFPPTDEVNWQKVAAAVALTRRISVISGGPGTGKTTTVAKLLAALIQMADGERCRIRLAAPTGKAAARLTESLGAALRQLPLTDAQKKRIPEDASTLHRLLGAQPGSQRLRHHAGNPLHLDVLVVDEASMIDLPMMSRLIDALPPHGRVIFLGDRDQLASVEAGAVLGDICAYVNAGFTAERARQLSRLTGSAIPAGAGTQAASLRDSLCLLQKSYRFGSDSGIGKLAAAINCGDRSAIQAVFQQGFSDIEKRTLQSSDDYAGMLDEALAGYGRYLRLLHEKAAPEAILQAFNEYQLLCALREGPFGVRGLNDRIEQAMVQQRKIQRHPHSRWYEGRPVMIARNDSALGLFNGDIGIALDRGQGLRVWFVMPDGTIKSVQPSRLPEHDTTWAMTVHKSQGSEFDHAALILPSQRSPVVTRELVYTAVTRARRRLSLYADERILAGAIVTRTERRSGLATLFDEVSRIG* +>9 +MQEVAMSSQEASKMLRTYNIAWWGNNYYDVNELGHISVCPDPDVPEARVDLAKLVKAREAQGQRLPALFCFPQILQHRLRSINAAFKRARESYGYNGDYFLVYPIKVNQHRRVIESLIHSGEPLGLEAGSKAELMAVLAHAGMTRSVIVCNGYKDREYIRLALIGEKMGHKVYLVIEKMSEIAIVLEEAERLNVVPRLGVRARLASQGSGKWQSSGGEKSKFGLAATQVLQLVETLRDAGRLDSLQLLHFHLGSQMANIRDIATGVRESARFYVELHKLGVNIQCFDVGGGLGVDYEGTRSQSDCSVNYGLNEYANNIIWAIGDACEEHGLPHPTVITESGRAVTAHHTVLVSNIIGVERNEYTDPTAPAEDAPRALQNLWETWQEMHKPGTRRSLREWLHDSQMDLHDIHIGYSSGAFSLQERAWAEQLYLSMCHEVQKQLDPQNRAHRPIIDELQERMADKMYVNFSLFQSMPDAWGIDQLFPVLPLEGLDQVPERRAVLLDITCDSDGAIDHYIDGDGIATTMPMPEYDPENPPMLGFFMVGAYQEILGNMHNLFGDTEAVDVFVFPDGSVEVELSDEGDTVADMLQYVQLDPKTLLTHFRDQVKQTDLDDALQQQFLEEFEAGLYGYTYLEDE* +>10 +MNSLPQRSTDFELTTSQDGFALSWQQRLILRHSAENPCLWIGAGVADIDMFRGNFSIKDKLNEKIALTEATVSELPDGWLVQFSRGATISATLRISADEAGRLTLDLQNDDLHHNRIWLRLAANPDDHIYGCGEQFSYFDLRGKPFPLWTSEQGVGRNKTSYVTWQADCKENAGGDYYLTFFPQPTFVSTQKYYCHVDNSCYMNFDFSAPEYHELALWEDKTTLRFECADTYIALLEKLTALLGRQPELPDWVYDGVTLGIQGGTEVCQQKLDNMRNAGVKVYGIWAQDWSGIRMTSFGKRVMWNWKWNSDNYPQLDSRIKQWKEEGVQFLSYINPYVASDKDLCAEAARHGYLAKDATGGDYLVEFGEFYGGVVDLTNPEAYDWFKDVIKKNMIALGCSGWMADFGEYLPTDTYLHNGVSAELMHNAWPALWAKCNYEALQKTGKLGEILFFMRAGYTGSQKYSTMMWAGDQNVDWSLDDGLASVVPAALSLAMTGHGLHHSDIGGYTTLFDMKRSKELLLRWCDFSAFTPMMRTHEGNRPSNDWQFDGDAETIAHFARMTTVFTTLKPYLKQAVAQNAATGLPVMRPLFLHYENDAATYTLKYQYLLGQDLLVAPVHEQGRCDWTLYLPEDHWVNIWTGEAHHGGEISVDAPIGKPPVFYRAKSEWALLFASLRNI* +>11 +MKLFLTTAALTATLTSGMGFASDTVIPWATNSGGTESTHIAAMGEDVNAQHQHIIHTHEGVCAANSGTIQADEAALTSNKPPVQVQPELLPHQG* +>12 +MKHLRVVACMIMLALAGCDNNDKTAPTTKSEAPAVAQPSPAQDPSQLQKLAQQSQGKALTLLDASEAQLDGAATLVLTFSIPLDPEQDFSRVVHVVDKKSGSVDGAWELAPNLKELRLRHLEPERVLVVTVDPAVKALNNATFGKSYEKTITTRDVQPSVGFASRGSLLPGKIAEGLPVMALNVNHVDVNFFRVKPGSLASFVSQWEYRSSLSNWESDNLLKMADLVYTGRFDLNPARNTREKLLLPLSDIKPLQQAGVYVAVMNQAGHYNYSNAATLFTLSDIGVSAHRYHSRLDIFTQSLENGAAQSGIEIVLLNDKGQTLAQATSDAQGHVQLEADKAAALLLARKEGQTTLLDLTLPALDLSEFNVAGAPGYSKQFFMFGPRDLYRPGETVILNGLLRDSDGKTLPDQPVKLEVVKPDGQVMRTVVSQPENGLYRLNYPLDINAPTGLWHVRANTGDNLLRSWDFHVEDFMPERMALNLTAQKTPLAPADEVKFSVVGYYLYGAPANGNTLQGQLFLRPLRDAVAALPGFQFGNIAEENLSRSLDEVQLTLDKGGRGEVSAASQWQEAHSPLQVILQASLLESGGRPVTRRVEQAIWPADTLPGIRPQFAAKAVYDYRTDTTVNQPIVDEDSNAAFDIVYANAQGEKKAVSGLQVRLIRERRDYYWNWSESEGWQSQFDQKDLVEGEQTLDLNADETGKVSFPVEWGAYRLEVKAPNETVSSVRFWAGYSWQDNSDGSGAARPDRVTLKLDKANYRPGDTMKLHIAAPVAGKGYAMVESSDGPLWWQAIDVPAQGLELTIPVDKTWNRHDLYLSTLVVRPGDKSRSATPKRAVGLLHLPLGDDNRRLDLALESPAKMRPNQPLTVRVKASVKHGEMPKQINVLVSAVDSGVLNITDYATPDPWQAFFGQKRYGADIYDIYGQVIEGQGRLAALRFGGDGDDLTRGGKPPVNHANIIAQQAQPITLNEQGEGVVTLPIGDFNGELRVMAQAWTADDFGRGESKVVVAAPVIAELNMPRFLAGGDVSRLVLDVTNLTDRPQTLNIALAASGLLELLSQQPQPVNLAPGVRTTLFVPVRALEGFGEGEIQATISGLNLPGETLGAQHKQWQIGVRPAWPAQTVNSGIALAPGESWHVPEQHLANVSPATLQGQLLLSGKPPLNLARYIRELKAYPYGCLEQTTSGLFPALYTNAAQLQSLGITGDSDEKRRAAVDIGISRILQMQRDNGGFALWDENGAEEPWLTAYAMDFLIRAGEQGYSVPPEAINRGNERLLRYLQDPGTMLIRYSDNTQASTFAAQAYAALVLARQQKAPLGALREIWERRSQAASGLPLMQLGIALNTMGDARRGEEAITLALNTPRQDERQWIADYGSSLRDNALMLSLLEENNLRPDAQNALLSSLSEQAFGQRWLSTQENNALFLAAHSRQASAGAWQAQTSLEAQPLSGDKALTRNLDADQLAALEVTNTGSQPLWLRLDSSGYPSSAPEPASNVLQIERQILGTDGQRKSLSSLRSGELVLVWLTVVADRNVPDALVVDLLPAGLELENQNLADSSASLPESGSEVQNLLNQMQQADIQYMEFRDDRFVAAVVVNEGQPVTLVYLARAVTPGTYQLAQPQVESMYAPQWRATGASEGLLIVTP* +>13 +MAKNRSRRLRKKMHIDEFQELGFSVAWRFPEGTSEEQIDKTVDDFINDVIEPNKLAFDGSGYLAWEGLICMQEIGKCTEEHQAIVRKWLEARNLEEVRTSELFDVWWD* +>14 +LKLIPFYLLALFSASSGATEINACKDLIGTWKTPADNPPYTVTILPPVEACGEKCVKLNVEYELDVTHRNALYCHERQEGVKGQGPMVIAFEGAYGGHAIGTYNRQLQLLRAGVIPKNKKWKWITKMENYWFRRVKAH* +>15 +MKKFDSCHPVFLLIGCAQVPLRSSVSKPVQQPSAQKEQLANANGIDECQSLPYVPSDLAKNKSLSNQIADNTASKNSAISSRIFCEKYKQTKEQALTFFQELPQYMRSKEVEEQHMTEFKKVLLEPGSKNLSIYQTLLAAHERLQAL* +>16 +MRIKPDDNWRWYYDEEHDRMMLDLANGMLFRSRFSRKMLTPDAFCPTGFCVDDAALYFSFEEKCRDFELTKEQRAELVLNALVAIRYLKPQMPKSWHFVAHGEMWTPGTGDAASVWLSDTAEQVNLLVVEPGENAALCLLAQPGVVIAGRTMQLGDAIKIMNDRLKPQVHCHSFSLEQAV* +>17 +VLAFTLRFIKNKRYFAILAGALVIIAGLDSQHACSGNGLPQINGKALAALAKQHPVVVLFRHAERCDRSDNTCLPDSTGITVNGAQDARALGKAFSADIQNYNLYSSNTVRAIQSATWFSAGRSRRAVKKMMDCGSGIYSSINTLLKNSQIKNIVILTHNHCLTYIVKNKRGVKFDPEYLNALVMYAENGKLLLDGEFVPG* +>18 +MGSNYIVIEGLEGAGKTTARDVVVETLEQLGIRNMIFTREPGGTQLAEKLRSLVLDIRSVGDEVITDKAEVLMFYAARVQLVETVIKPALAQGVWVIGDRHDLSTQAYQGGGRGIDQTMLATLRDAVLGDFRPDLTLYLDVTPEVGLKRARARGDLDRIEQESFDFFNRTRARYLELAAQDSRIRTIDATQPLDAVMRDIRATVTKWVQEQAA* +>19 +MKHIKKSVLVVLLTSHVAHASIVVGGTRLVFDGNNDESSINVENKDSKANLVQSWLSVADPQVTNKQVFIITPPLFRLDAGQKNSIRVIRSGAPLPADRESMYWLNIKGIPSIDDNASANRVEISINTQIKLIYRPPALTKSTPDSQSQQLKWQTAGDVITVNNPTPYYMNFASVTLNSHEVKSATFVPPKSSASFKLSSTAAPHGTVTWRLISDYGMSLEPHSGSF* diff --git a/tests/test_data/outputs/search/G4/run.json b/tests/test_data/outputs/search/G4/run.json new file mode 100755 index 0000000..3c42732 --- /dev/null +++ b/tests/test_data/outputs/search/G4/run.json @@ -0,0 +1,27 @@ +{ + "analysis_start_time": "10/06/2024 11:11:51", + "parameters": { + "query": "locidex/extract/G4/raw.extracted.seqs.fasta", + "outdir": "locidex/search/G4", + "name": "G4", + "db": "locidex/db", + "config": null, + "min_evalue": 0.0001, + "min_dna_len": 1, + "min_aa_len": 1, + "max_dna_len": 10000000, + "max_aa_len": 10000000, + "min_dna_ident": 80.0, + "min_aa_ident": 80.0, + "min_dna_match_cov": 80.0, + "min_aa_match_cov": 80.0, + "max_target_seqs": 10, + "n_threads": 8, + "format": null, + "translation_table": 11, + "annotate": false, + "force": true + }, + "result_file": "locidex/search/G4/seq_store.json", + "analysis_end_time": "10/06/2024 11:11:53" +} \ No newline at end of file diff --git a/tests/test_data/outputs/search/G4/seq_store.json b/tests/test_data/outputs/search/G4/seq_store.json new file mode 100755 index 0000000..3238da5 --- /dev/null +++ b/tests/test_data/outputs/search/G4/seq_store.json @@ -0,0 +1,1744 @@ +{ + "db_info": { + "db_name": "Locidex Test Database", + "db_version": "1.0.0", + "db_date": "2024/06/06", + "db_author": "James Robertson", + "db_desc": "Using Salmonella GCA_000007545.2 as source data", + "db_num_seqs": 20, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" + }, + "db_seq_info": { + "0": { + "seq_id": 0, + "locus_name": "locus_1", + "locus_name_alt": "SALM_11273", + "locus_product": "!", + "locus_description": "hypothetical protein", + "locus_uid": "1", + "dna_seq_len": 102, + "dna_seq_hash": "d17b02d12afa7f832ee37df6f24a8f55", + "aa_seq_len": 34, + "aa_seq_hash": "84ac553cb45dd790c497c27152888f02", + "dna_min_len": 71.4, + "dna_max_len": 132.6, + "aa_min_len": 23.8, + "aa_max_len": 44.2, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "1": { + "seq_id": 1, + "locus_name": "locus_2", + "locus_name_alt": "SALM_120", + "locus_product": "@", + "locus_description": "outer membrane protein", + "locus_uid": "B", + "dna_seq_len": 285, + "dna_seq_hash": "e35184c8ff18e9116fc8faef20532f56", + "aa_seq_len": 95, + "aa_seq_hash": "2d9e7f7f0d11bf02db860b0799e7924d", + "dna_min_len": 199.5, + "dna_max_len": 370.5, + "aa_min_len": 66.5, + "aa_max_len": 123.5, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "2": { + "seq_id": 2, + "locus_name": "locus_3", + "locus_name_alt": "SALM_2016", + "locus_product": "#", + "locus_description": "tRNA (guanosine(46)-N7)-methyltransferase TrmB", + "locus_uid": "C", + "dna_seq_len": 327, + "dna_seq_hash": "670705cd2a59c4a23a897ac656a888fe", + "aa_seq_len": 109, + "aa_seq_hash": "4277f8bbf1fd6682001da089fea83d04", + "dna_min_len": 228.9, + "dna_max_len": 425.1, + "aa_min_len": 76.3, + "aa_max_len": 141.7, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "3": { + "seq_id": 3, + "locus_name": "locus_4", + "locus_name_alt": "SALM_8644", + "locus_product": "$", + "locus_description": "AZ624_004720", + "locus_uid": "AZ624_004720", + "dna_seq_len": 417, + "dna_seq_hash": "ac1b21798c0f672ad26f5a91ea278590", + "aa_seq_len": 139, + "aa_seq_hash": "0c25367401155278f34832f184ab44e6", + "dna_min_len": 291.9, + "dna_max_len": 542.1, + "aa_min_len": 97.3, + "aa_max_len": 180.7, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "4": { + "seq_id": 4, + "locus_name": "locus_5", + "locus_name_alt": "SALM_1876", + "locus_product": "%", + "locus_description": "SPI-1 type III secretion system invasion lipoprotein InvH", + "locus_uid": "E", + "dna_seq_len": 444, + "dna_seq_hash": "d00defcca8588f21ce16fa1d0ac13389", + "aa_seq_len": 148, + "aa_seq_hash": "c8bf12a8057fc5e541fcd4924136a40d", + "dna_min_len": 310.8, + "dna_max_len": 577.2, + "aa_min_len": 103.6, + "aa_max_len": 192.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "5": { + "seq_id": 5, + "locus_name": "locus_6", + "locus_name_alt": "SALM_640", + "locus_product": "^", + "locus_description": "MOSC domain-containing protein", + "locus_uid": "F", + "dna_seq_len": 543, + "dna_seq_hash": "a11561f2804e2c32c78049f8b9aeb517", + "aa_seq_len": 181, + "aa_seq_hash": "1f6a45ea291940ef2c17ec1cfdd5fbdd", + "dna_min_len": 380.1, + "dna_max_len": 705.9, + "aa_min_len": 126.7, + "aa_max_len": 235.3, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "6": { + "seq_id": 6, + "locus_name": "locus_7", + "locus_name_alt": "SALM_1501", + "locus_product": "&", + "locus_description": "India: Vellore", + "locus_uid": "G", + "dna_seq_len": 606, + "dna_seq_hash": "dc94bf1ec4ff9bed2a1f460cbd958656", + "aa_seq_len": 202, + "aa_seq_hash": "62252b3326997117f127efb88ff09294", + "dna_min_len": 424.2, + "dna_max_len": 787.8, + "aa_min_len": 141.4, + "aa_max_len": 262.6, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "7": { + "seq_id": 7, + "locus_name": "locus_8", + "locus_name_alt": "SALM_756", + "locus_product": "*", + "locus_description": "DNA polymerase III subunit delta'", + "locus_uid": "H", + "dna_seq_len": 642, + "dna_seq_hash": "7ebe74afecf146ec4db816c8deced64f", + "aa_seq_len": 214, + "aa_seq_hash": "2449629747a7c58f0f2cad411db87178", + "dna_min_len": 449.4, + "dna_max_len": 834.6, + "aa_min_len": 149.8, + "aa_max_len": 278.2, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "8": { + "seq_id": 8, + "locus_name": "locus_9", + "locus_name_alt": "SALM_7353", + "locus_product": "1", + "locus_description": "fimbrial assembly chaperone", + "locus_uid": "I", + "dna_seq_len": 684, + "dna_seq_hash": "41ebb36872854b2b33c8c028e23d8ad1", + "aa_seq_len": 228, + "aa_seq_hash": "a75a93991228940fb46d917f238beb5a", + "dna_min_len": 478.8, + "dna_max_len": 889.2, + "aa_min_len": 159.6, + "aa_max_len": 296.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "9": { + "seq_id": 9, + "locus_name": "locus_10", + "locus_name_alt": "SALM_1891", + "locus_product": "200.96", + "locus_description": "5'/3'-nucleotidase SurE", + "locus_uid": "J", + "dna_seq_len": 762, + "dna_seq_hash": "fe04d17ec353c08b903c85fc0ca4dc02", + "aa_seq_len": 254, + "aa_seq_hash": "bd09702e070040e0fc8d2ec3b830812c", + "dna_min_len": 533.4, + "dna_max_len": 990.6, + "aa_min_len": 177.8, + "aa_max_len": 330.2, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "10": { + "seq_id": 10, + "locus_name": "locus_11", + "locus_name_alt": "SALM_1452", + "locus_product": "|", + "locus_description": "1-phosphofructokinase", + "locus_uid": "K", + "dna_seq_len": 858, + "dna_seq_hash": "5b128d659955716833ce42f2bb060212", + "aa_seq_len": 286, + "aa_seq_hash": "4daecf1a6ccae76e1d97a4ce9ee4ff0b", + "dna_min_len": 600.6, + "dna_max_len": 1115.4, + "aa_min_len": 200.2, + "aa_max_len": 371.8, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "11": { + "seq_id": 11, + "locus_name": "locus_12", + "locus_name_alt": "SALM_11020", + "locus_product": "_", + "locus_description": "1", + "locus_uid": "L", + "dna_seq_len": 972, + "dna_seq_hash": "eb72da68c159497d5f0c8eeddc51b5ae", + "aa_seq_len": 324, + "aa_seq_hash": "a1f16dd269dc295e715f2d00f9c26b43", + "dna_min_len": 680.4, + "dna_max_len": 1263.6, + "aa_min_len": 226.8, + "aa_max_len": 421.2, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "12": { + "seq_id": 12, + "locus_name": "locus_13", + "locus_name_alt": "SALM_1934", + "locus_product": "-", + "locus_description": "3.9", + "locus_uid": "M", + "dna_seq_len": 1098, + "dna_seq_hash": "8f300259dcb46224bdc1fe5273107324", + "aa_seq_len": 366, + "aa_seq_hash": "b16bc26e42f5bc4327504b1ec8b2d53d", + "dna_min_len": 768.6, + "dna_max_len": 1427.4, + "aa_min_len": 256.2, + "aa_max_len": 475.8, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "13": { + "seq_id": 13, + "locus_name": "locus_14", + "locus_name_alt": "SALM_2871", + "locus_product": "+", + "locus_description": "@", + "locus_uid": "N", + "dna_seq_len": 1281, + "dna_seq_hash": "b9060019038526aa6fc38d2f7510edc6", + "aa_seq_len": 427, + "aa_seq_hash": "ed11561b2bedaa12c7a28eb0e9346101", + "dna_min_len": 896.7, + "dna_max_len": 1665.3, + "aa_min_len": 298.9, + "aa_max_len": 555.1, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "14": { + "seq_id": 14, + "locus_name": "locus_15", + "locus_name_alt": "SALM_583", + "locus_product": "=", + "locus_description": "DMT family transporter", + "locus_uid": "O", + "dna_seq_len": 1434, + "dna_seq_hash": "bc98c2fe196a68a79036814396513a8d", + "aa_seq_len": 478, + "aa_seq_hash": "16cb2acec887d5861327e04f2705b8ce", + "dna_min_len": 1003.8, + "dna_max_len": 1864.2, + "aa_min_len": 334.6, + "aa_max_len": 621.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "15": { + "seq_id": 15, + "locus_name": "locus_16", + "locus_name_alt": "SALM_780", + "locus_product": "<", + "locus_description": "murein transglycosylase A", + "locus_uid": "P", + "dna_seq_len": 1464, + "dna_seq_hash": "16e55766c603fe33c9e75d8e81743ae2", + "aa_seq_len": 488, + "aa_seq_hash": "b20314e55f9713235e9c4ea5817b56df", + "dna_min_len": 1024.8, + "dna_max_len": 1903.2, + "aa_min_len": 341.6, + "aa_max_len": 634.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "16": { + "seq_id": 16, + "locus_name": "locus_17", + "locus_name_alt": "SALM_1937", + "locus_product": ">", + "locus_description": "GTPase HflX", + "locus_uid": "Q", + "dna_seq_len": 1836, + "dna_seq_hash": "a0d97d985483413f3c18bfe5833ae9ce", + "aa_seq_len": 612, + "aa_seq_hash": "d6012eddc7a6e8d7d40761d00ed71a5a", + "dna_min_len": 1285.2, + "dna_max_len": 2386.8, + "aa_min_len": 428.4, + "aa_max_len": 795.6, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "17": { + "seq_id": 17, + "locus_name": "locus_18", + "locus_name_alt": "SALM_1997", + "locus_product": "?", + "locus_description": "biosynthetic arginine decarboxylase", + "locus_uid": "R", + "dna_seq_len": 1914, + "dna_seq_hash": "b3021e979faa7600756c06dfadfcf14c", + "aa_seq_len": 638, + "aa_seq_hash": "e68e83956ee1d4c685571e5348c8def1", + "dna_min_len": 1339.8, + "dna_max_len": 2488.2, + "aa_min_len": 446.6, + "aa_max_len": 829.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "18": { + "seq_id": 18, + "locus_name": "locus_19", + "locus_name_alt": "SALM_9926", + "locus_product": ",", + "locus_description": "https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662", + "locus_uid": "S", + "dna_seq_len": 2037, + "dna_seq_hash": "a012eee23637b48e39b00808a057e35d", + "aa_seq_len": 679, + "aa_seq_hash": "1302fea1dbd5bb756db34e09b863ad44", + "dna_min_len": 1425.9, + "dna_max_len": 2648.1, + "aa_min_len": 475.3, + "aa_max_len": 882.7, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "19": { + "seq_id": 19, + "locus_name": "locus_20", + "locus_name_alt": "SALM_6064", + "locus_product": ".", + "locus_description": "alpha-2-macroglobulin family protein", + "locus_uid": "T", + "dna_seq_len": 4935, + "dna_seq_hash": "4461918e985715e4a2b07494e1f91326", + "aa_seq_len": 1645, + "aa_seq_hash": "12ff39a1933fa478729b142976b0a659", + "dna_min_len": 3454.5, + "dna_max_len": 6415.5, + "aa_min_len": 1151.5, + "aa_max_len": 2138.5, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + } + }, + "query_data": { + "sample_name": "G4", + "query_seq_data": { + "0": { + "parent_id": "locus_1:0:0:0", + "locus_name": "locus_1:0:0:0", + "seq_id": "locus_1:0:0:0", + "dna_hash": "d17b02d12afa7f832ee37df6f24a8f55", + "dna_len": 102, + "aa_hash": "a931d1f75114576e60538364eb01a05f", + "aa_len": 34, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "1": { + "parent_id": "locus_10:9:0:1", + "locus_name": "locus_10:9:0:1", + "seq_id": "locus_10:9:0:1", + "dna_hash": "fe04d17ec353c08b903c85fc0ca4dc02", + "dna_len": 762, + "aa_hash": "988bf512f0362e276b0e5622fbaa7079", + "aa_len": 254, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "2": { + "parent_id": "locus_11:10:0:2", + "locus_name": "locus_11:10:0:2", + "seq_id": "locus_11:10:0:2", + "dna_hash": "5b128d659955716833ce42f2bb060212", + "dna_len": 858, + "aa_hash": "d6a46f107d0604f27820147b523948c8", + "aa_len": 286, + "start_codon": "gtg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "3": { + "parent_id": "locus_12:11:0:3", + "locus_name": "locus_12:11:0:3", + "seq_id": "locus_12:11:0:3", + "dna_hash": "eb72da68c159497d5f0c8eeddc51b5ae", + "dna_len": 972, + "aa_hash": "2fb77f6d0f615a030a08af74f3eda277", + "aa_len": 324, + "start_codon": "atg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "4": { + "parent_id": "locus_13:12:0:4", + "locus_name": "locus_13:12:0:4", + "seq_id": "locus_13:12:0:4", + "dna_hash": "8f300259dcb46224bdc1fe5273107324", + "dna_len": 1098, + "aa_hash": "fd5140611af6e0d9f95426bf9beaa8b1", + "aa_len": 366, + "start_codon": "atg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "5": { + "parent_id": "locus_14:13:0:5", + "locus_name": "locus_14:13:0:5", + "seq_id": "locus_14:13:0:5", + "dna_hash": "b9060019038526aa6fc38d2f7510edc6", + "dna_len": 1281, + "aa_hash": "05bc7823b1abc2e6d4e2c08ca5325134", + "aa_len": 427, + "start_codon": "ttg", + "stop_codon": "tag", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "6": { + "parent_id": "locus_15:14:0:6", + "locus_name": "locus_15:14:0:6", + "seq_id": "locus_15:14:0:6", + "dna_hash": "bc98c2fe196a68a79036814396513a8d", + "dna_len": 1434, + "aa_hash": "5d8c3cf06444c3741ebfc57cc92db0d3", + "aa_len": 478, + "start_codon": "gtg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "7": { + "parent_id": "locus_16:15:0:7", + "locus_name": "locus_16:15:0:7", + "seq_id": "locus_16:15:0:7", + "dna_hash": "16e55766c603fe33c9e75d8e81743ae2", + "dna_len": 1464, + "aa_hash": "f85b3701f5642454bf4d2263feb13354", + "aa_len": 488, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "8": { + "parent_id": "locus_17:16:0:8", + "locus_name": "locus_17:16:0:8", + "seq_id": "locus_17:16:0:8", + "dna_hash": "a0d97d985483413f3c18bfe5833ae9ce", + "dna_len": 1836, + "aa_hash": "bdd2b967a7a6337b5973dd723a62fa43", + "aa_len": 612, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "9": { + "parent_id": "locus_18:17:0:9", + "locus_name": "locus_18:17:0:9", + "seq_id": "locus_18:17:0:9", + "dna_hash": "b3021e979faa7600756c06dfadfcf14c", + "dna_len": 1914, + "aa_hash": "42c4a831ee79a27c47138fe96829814b", + "aa_len": 638, + "start_codon": "atg", + "stop_codon": "tag", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "10": { + "parent_id": "locus_19:18:0:10", + "locus_name": "locus_19:18:0:10", + "seq_id": "locus_19:18:0:10", + "dna_hash": "a012eee23637b48e39b00808a057e35d", + "dna_len": 2037, + "aa_hash": "cb1202450e68e2b4f0d557a645f1a98d", + "aa_len": 679, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "11": { + "parent_id": "locus_2:1:0:11", + "locus_name": "locus_2:1:0:11", + "seq_id": "locus_2:1:0:11", + "dna_hash": "e35184c8ff18e9116fc8faef20532f56", + "dna_len": 285, + "aa_hash": "2a1a77c25ad681437705d9145aef608c", + "aa_len": 95, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "12": { + "parent_id": "locus_20:19:0:12", + "locus_name": "locus_20:19:0:12", + "seq_id": "locus_20:19:0:12", + "dna_hash": "4461918e985715e4a2b07494e1f91326", + "dna_len": 4935, + "aa_hash": "ffe361d9273afbab0ed9009dd5aa4041", + "aa_len": 1645, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "13": { + "parent_id": "locus_3:2:0:13", + "locus_name": "locus_3:2:0:13", + "seq_id": "locus_3:2:0:13", + "dna_hash": "670705cd2a59c4a23a897ac656a888fe", + "dna_len": 327, + "aa_hash": "f46f4f7b2362abaf1440c9752e10670e", + "aa_len": 109, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "14": { + "parent_id": "locus_4:3:0:14", + "locus_name": "locus_4:3:0:14", + "seq_id": "locus_4:3:0:14", + "dna_hash": "ac1b21798c0f672ad26f5a91ea278590", + "dna_len": 417, + "aa_hash": "dbcec3a0e9ecdc165c4e9162b079f2ee", + "aa_len": 139, + "start_codon": "ctg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "15": { + "parent_id": "locus_5:4:0:15", + "locus_name": "locus_5:4:0:15", + "seq_id": "locus_5:4:0:15", + "dna_hash": "d00defcca8588f21ce16fa1d0ac13389", + "dna_len": 444, + "aa_hash": "82d8baa0a3dad18a0efd8104ee15baae", + "aa_len": 148, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "16": { + "parent_id": "locus_6:5:0:16", + "locus_name": "locus_6:5:0:16", + "seq_id": "locus_6:5:0:16", + "dna_hash": "a11561f2804e2c32c78049f8b9aeb517", + "dna_len": 543, + "aa_hash": "f7a904be6607ca5a6d9388ad9ca3693a", + "aa_len": 181, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "17": { + "parent_id": "locus_7:6:0:17", + "locus_name": "locus_7:6:0:17", + "seq_id": "locus_7:6:0:17", + "dna_hash": "dc94bf1ec4ff9bed2a1f460cbd958656", + "dna_len": 606, + "aa_hash": "da78b534d889d8f35bec304ef54f1b93", + "aa_len": 202, + "start_codon": "gtg", + "stop_codon": "tag", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "18": { + "parent_id": "locus_8:7:0:18", + "locus_name": "locus_8:7:0:18", + "seq_id": "locus_8:7:0:18", + "dna_hash": "7ebe74afecf146ec4db816c8deced64f", + "dna_len": 642, + "aa_hash": "ad5a47cb3a55985083b963b0cf67f382", + "aa_len": 214, + "start_codon": "atg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "19": { + "parent_id": "locus_9:8:0:19", + "locus_name": "locus_9:8:0:19", + "seq_id": "locus_9:8:0:19", + "dna_hash": "41ebb36872854b2b33c8c028e23d8ad1", + "dna_len": 684, + "aa_hash": "02989d1dbb505d15f06e76ddc9174b7e", + "aa_len": 228, + "start_codon": "atg", + "stop_codon": "tag", + "count_internal_stop": 0, + "dna_ambig_count": 0 + } + }, + "query_hit_columns": [], + "query_hits": { + "0": { + "nucleotide": [ + { + "qseqid": 0, + "sseqid": 0, + "qlen": 102, + "slen": 102, + "qstart": 1, + "qend": 102, + "sstart": 1, + "send": 102, + "length": 102, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 1.8e-51, + "bitscore": 189 + } + ], + "protein": [ + { + "qseqid": 0, + "sseqid": 0, + "qlen": 34, + "slen": 34, + "qstart": 1, + "qend": 34, + "sstart": 1, + "send": 34, + "length": 34, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 4.94e-20, + "bitscore": 64.3 + } + ] + }, + "1": { + "nucleotide": [ + { + "qseqid": 1, + "sseqid": 9, + "qlen": 762, + "slen": 762, + "qstart": 1, + "qend": 762, + "sstart": 1, + "send": 762, + "length": 762, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1408 + } + ], + "protein": [ + { + "qseqid": 1, + "sseqid": 9, + "qlen": 254, + "slen": 254, + "qstart": 1, + "qend": 254, + "sstart": 1, + "send": 254, + "length": 254, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 514.0 + } + ] + }, + "2": { + "nucleotide": [ + { + "qseqid": 2, + "sseqid": 10, + "qlen": 858, + "slen": 858, + "qstart": 1, + "qend": 858, + "sstart": 1, + "send": 858, + "length": 858, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1585 + } + ], + "protein": [ + { + "qseqid": 2, + "sseqid": 10, + "qlen": 286, + "slen": 286, + "qstart": 1, + "qend": 286, + "sstart": 1, + "send": 286, + "length": 286, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 579.0 + } + ] + }, + "3": { + "nucleotide": [ + { + "qseqid": 3, + "sseqid": 11, + "qlen": 972, + "slen": 972, + "qstart": 1, + "qend": 972, + "sstart": 1, + "send": 972, + "length": 972, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1796 + } + ], + "protein": [ + { + "qseqid": 3, + "sseqid": 11, + "qlen": 324, + "slen": 324, + "qstart": 1, + "qend": 324, + "sstart": 1, + "send": 324, + "length": 324, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 640.0 + } + ] + }, + "4": { + "nucleotide": [ + { + "qseqid": 4, + "sseqid": 12, + "qlen": 1098, + "slen": 1098, + "qstart": 1, + "qend": 1098, + "sstart": 1, + "send": 1098, + "length": 1098, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 2028 + } + ], + "protein": [ + { + "qseqid": 4, + "sseqid": 12, + "qlen": 366, + "slen": 366, + "qstart": 1, + "qend": 366, + "sstart": 1, + "send": 366, + "length": 366, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 754.0 + } + ] + }, + "5": { + "nucleotide": [ + { + "qseqid": 5, + "sseqid": 13, + "qlen": 1281, + "slen": 1281, + "qstart": 1, + "qend": 1281, + "sstart": 1, + "send": 1281, + "length": 1281, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 2366 + } + ], + "protein": [ + { + "qseqid": 5, + "sseqid": 13, + "qlen": 427, + "slen": 427, + "qstart": 1, + "qend": 427, + "sstart": 1, + "send": 427, + "length": 427, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 869.0 + } + ] + }, + "6": { + "nucleotide": [ + { + "qseqid": 6, + "sseqid": 14, + "qlen": 1434, + "slen": 1434, + "qstart": 1, + "qend": 1434, + "sstart": 1, + "send": 1434, + "length": 1434, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 2649 + } + ], + "protein": [ + { + "qseqid": 6, + "sseqid": 14, + "qlen": 478, + "slen": 478, + "qstart": 1, + "qend": 478, + "sstart": 1, + "send": 478, + "length": 478, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 972.0 + } + ] + }, + "7": { + "nucleotide": [ + { + "qseqid": 7, + "sseqid": 15, + "qlen": 1464, + "slen": 1464, + "qstart": 1, + "qend": 1464, + "sstart": 1, + "send": 1464, + "length": 1464, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 2704 + } + ], + "protein": [ + { + "qseqid": 7, + "sseqid": 15, + "qlen": 488, + "slen": 488, + "qstart": 1, + "qend": 488, + "sstart": 1, + "send": 488, + "length": 488, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 1004.0 + } + ] + }, + "8": { + "nucleotide": [ + { + "qseqid": 8, + "sseqid": 16, + "qlen": 1836, + "slen": 1836, + "qstart": 1, + "qend": 1836, + "sstart": 1, + "send": 1836, + "length": 1836, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 3391 + } + ], + "protein": [ + { + "qseqid": 8, + "sseqid": 16, + "qlen": 612, + "slen": 612, + "qstart": 1, + "qend": 612, + "sstart": 1, + "send": 612, + "length": 612, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 1241.0 + } + ] + }, + "9": { + "nucleotide": [ + { + "qseqid": 9, + "sseqid": 17, + "qlen": 1914, + "slen": 1914, + "qstart": 1, + "qend": 1914, + "sstart": 1, + "send": 1914, + "length": 1914, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 3535 + } + ], + "protein": [ + { + "qseqid": 9, + "sseqid": 17, + "qlen": 638, + "slen": 638, + "qstart": 1, + "qend": 638, + "sstart": 1, + "send": 638, + "length": 638, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 1326.0 + } + ] + }, + "10": { + "nucleotide": [ + { + "qseqid": 10, + "sseqid": 18, + "qlen": 2037, + "slen": 2037, + "qstart": 1, + "qend": 2037, + "sstart": 1, + "send": 2037, + "length": 2037, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 3762 + } + ], + "protein": [ + { + "qseqid": 10, + "sseqid": 18, + "qlen": 679, + "slen": 679, + "qstart": 1, + "qend": 679, + "sstart": 1, + "send": 679, + "length": 679, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 1419.0 + } + ] + }, + "11": { + "nucleotide": [ + { + "qseqid": 11, + "sseqid": 1, + "qlen": 285, + "slen": 285, + "qstart": 1, + "qend": 285, + "sstart": 1, + "send": 285, + "length": 285, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 1.03e-152, + "bitscore": 527 + } + ], + "protein": [ + { + "qseqid": 11, + "sseqid": 1, + "qlen": 95, + "slen": 95, + "qstart": 1, + "qend": 95, + "sstart": 1, + "send": 95, + "length": 95, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 1.2299999999999998e-68, + "bitscore": 191.0 + } + ] + }, + "12": { + "nucleotide": [ + { + "qseqid": 12, + "sseqid": 19, + "qlen": 4935, + "slen": 4935, + "qstart": 1, + "qend": 4935, + "sstart": 1, + "send": 4935, + "length": 4935, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 9114 + } + ], + "protein": [ + { + "qseqid": 12, + "sseqid": 19, + "qlen": 1645, + "slen": 1645, + "qstart": 1, + "qend": 1645, + "sstart": 1, + "send": 1645, + "length": 1645, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 3332.0 + } + ] + }, + "13": { + "nucleotide": [ + { + "qseqid": 13, + "sseqid": 2, + "qlen": 327, + "slen": 327, + "qstart": 1, + "qend": 327, + "sstart": 1, + "send": 327, + "length": 327, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 5.34e-176, + "bitscore": 604 + } + ], + "protein": [ + { + "qseqid": 13, + "sseqid": 2, + "qlen": 109, + "slen": 109, + "qstart": 1, + "qend": 109, + "sstart": 1, + "send": 109, + "length": 109, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 3.09e-81, + "bitscore": 224.0 + } + ] + }, + "14": { + "nucleotide": [ + { + "qseqid": 14, + "sseqid": 3, + "qlen": 417, + "slen": 417, + "qstart": 1, + "qend": 417, + "sstart": 1, + "send": 417, + "length": 417, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 771 + } + ], + "protein": [ + { + "qseqid": 14, + "sseqid": 3, + "qlen": 139, + "slen": 139, + "qstart": 1, + "qend": 139, + "sstart": 1, + "send": 139, + "length": 139, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 2.34e-106, + "bitscore": 290.0 + } + ] + }, + "15": { + "nucleotide": [ + { + "qseqid": 15, + "sseqid": 4, + "qlen": 444, + "slen": 444, + "qstart": 1, + "qend": 444, + "sstart": 1, + "send": 444, + "length": 444, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 821 + } + ], + "protein": [ + { + "qseqid": 15, + "sseqid": 4, + "qlen": 148, + "slen": 148, + "qstart": 1, + "qend": 148, + "sstart": 1, + "send": 148, + "length": 148, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 1.0400000000000001e-110, + "bitscore": 302.0 + } + ] + }, + "16": { + "nucleotide": [ + { + "qseqid": 16, + "sseqid": 5, + "qlen": 543, + "slen": 543, + "qstart": 1, + "qend": 543, + "sstart": 1, + "send": 543, + "length": 543, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1003 + } + ], + "protein": [ + { + "qseqid": 16, + "sseqid": 5, + "qlen": 181, + "slen": 181, + "qstart": 1, + "qend": 181, + "sstart": 1, + "send": 181, + "length": 181, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 1.7899999999999999e-140, + "bitscore": 380.0 + } + ] + }, + "17": { + "nucleotide": [ + { + "qseqid": 17, + "sseqid": 6, + "qlen": 606, + "slen": 606, + "qstart": 1, + "qend": 606, + "sstart": 1, + "send": 606, + "length": 606, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1120 + } + ], + "protein": [ + { + "qseqid": 17, + "sseqid": 6, + "qlen": 202, + "slen": 202, + "qstart": 1, + "qend": 202, + "sstart": 1, + "send": 202, + "length": 202, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 3.71e-154, + "bitscore": 416.0 + } + ] + }, + "18": { + "nucleotide": [ + { + "qseqid": 18, + "sseqid": 7, + "qlen": 642, + "slen": 642, + "qstart": 1, + "qend": 642, + "sstart": 1, + "send": 642, + "length": 642, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1186 + } + ], + "protein": [ + { + "qseqid": 18, + "sseqid": 7, + "qlen": 214, + "slen": 214, + "qstart": 1, + "qend": 214, + "sstart": 1, + "send": 214, + "length": 214, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 1.75e-157, + "bitscore": 426.0 + } + ] + }, + "19": { + "nucleotide": [ + { + "qseqid": 19, + "sseqid": 8, + "qlen": 684, + "slen": 684, + "qstart": 1, + "qend": 684, + "sstart": 1, + "send": 684, + "length": 684, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1264 + } + ], + "protein": [ + { + "qseqid": 19, + "sseqid": 8, + "qlen": 228, + "slen": 228, + "qstart": 1, + "qend": 228, + "sstart": 1, + "send": 228, + "length": 228, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 6.32e-172, + "bitscore": 463.0 + } + ] + } + }, + "locus_profile": { + "locus_1": { + "nucleotide": [ + "0" + ], + "protein": [ + "0" + ] + }, + "locus_2": { + "nucleotide": [ + "11" + ], + "protein": [ + "11" + ] + }, + "locus_3": { + "nucleotide": [ + "13" + ], + "protein": [ + "13" + ] + }, + "locus_4": { + "nucleotide": [ + "14" + ], + "protein": [ + "14" + ] + }, + "locus_5": { + "nucleotide": [ + "15" + ], + "protein": [ + "15" + ] + }, + "locus_6": { + "nucleotide": [ + "16" + ], + "protein": [ + "16" + ] + }, + "locus_7": { + "nucleotide": [ + "17" + ], + "protein": [ + "17" + ] + }, + "locus_8": { + "nucleotide": [ + "18" + ], + "protein": [ + "18" + ] + }, + "locus_9": { + "nucleotide": [ + "19" + ], + "protein": [ + "19" + ] + }, + "locus_10": { + "nucleotide": [ + "1" + ], + "protein": [ + "1" + ] + }, + "locus_11": { + "nucleotide": [ + "2" + ], + "protein": [ + "2" + ] + }, + "locus_12": { + "nucleotide": [ + "3" + ], + "protein": [ + "3" + ] + }, + "locus_13": { + "nucleotide": [ + "4" + ], + "protein": [ + "4" + ] + }, + "locus_14": { + "nucleotide": [ + "5" + ], + "protein": [ + "5" + ] + }, + "locus_15": { + "nucleotide": [ + "6" + ], + "protein": [ + "6" + ] + }, + "locus_16": { + "nucleotide": [ + "7" + ], + "protein": [ + "7" + ] + }, + "locus_17": { + "nucleotide": [ + "8" + ], + "protein": [ + "8" + ] + }, + "locus_18": { + "nucleotide": [ + "9" + ], + "protein": [ + "9" + ] + }, + "locus_19": { + "nucleotide": [ + "10" + ], + "protein": [ + "10" + ] + }, + "locus_20": { + "nucleotide": [ + "12" + ], + "protein": [ + "12" + ] + } + } + }, + "query_hit_columns": [ + "qseqid", + "sseqid", + "qlen", + "slen", + "qstart", + "qend", + "sstart", + "send", + "length", + "mismatch", + "pident", + "qcovhsp", + "qcovs", + "sstrand", + "evalue", + "bitscore" + ] +} \ No newline at end of file diff --git a/tests/test_data/outputs/search/G5/blast/nucleotide/hsps.txt b/tests/test_data/outputs/search/G5/blast/nucleotide/hsps.txt new file mode 100755 index 0000000..d8538d1 --- /dev/null +++ b/tests/test_data/outputs/search/G5/blast/nucleotide/hsps.txt @@ -0,0 +1,20 @@ +0 0 102 102 1 102 1 102 103 1 97.087 100 100 plus 1.81e-46 172 +1 9 762 762 1 762 1 762 762 0 100.000 100 100 plus 0.0 1408 +2 10 858 858 1 858 1 858 858 19 97.786 100 100 plus 0.0 1480 +3 11 972 972 1 972 1 972 972 0 100.000 100 100 plus 0.0 1796 +4 12 1098 1098 1 1098 1 1098 1098 0 100.000 100 100 plus 0.0 2028 +5 13 1281 1281 1 1281 1 1281 1281 11 99.141 100 100 plus 0.0 2305 +6 14 1434 1434 1 1434 1 1434 1434 0 100.000 100 100 plus 0.0 2649 +7 15 1464 1464 1 1464 1 1464 1464 15 98.975 100 100 plus 0.0 2621 +8 16 1836 1836 1 1836 1 1836 1836 0 100.000 100 100 plus 0.0 3391 +9 17 1914 1914 1 1914 1 1914 1914 0 100.000 100 100 plus 0.0 3535 +10 18 2037 2037 1 2037 1 2037 2037 16 99.215 100 100 plus 0.0 3674 +11 1 285 285 1 285 1 285 285 17 94.035 100 100 plus 2.31e-124 433 +12 19 4935 4935 1 4935 1 4935 4935 0 100.000 100 100 plus 0.0 9114 +13 2 327 327 1 327 1 327 327 0 100.000 100 100 plus 5.34e-176 604 +14 3 417 417 1 417 1 417 417 11 97.362 100 100 plus 0.0 710 +15 4 444 444 1 444 1 444 444 15 96.622 100 100 plus 0.0 737 +16 5 543 543 1 543 1 543 543 0 100.000 100 100 plus 0.0 1003 +17 6 606 606 1 606 1 606 606 15 97.525 100 100 plus 0.0 1037 +18 7 642 642 1 642 1 642 642 0 100.000 100 100 plus 0.0 1186 +19 8 684 684 1 684 1 684 684 0 100.000 100 100 plus 0.0 1264 diff --git a/tests/test_data/outputs/search/G5/blast/nucleotide/queries.fasta b/tests/test_data/outputs/search/G5/blast/nucleotide/queries.fasta new file mode 100755 index 0000000..306e3c9 --- /dev/null +++ b/tests/test_data/outputs/search/G5/blast/nucleotide/queries.fasta @@ -0,0 +1,40 @@ +>0 +atgtactgaacaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa +>1 +atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa +>2 +gtgaacctggtgaaaaccacgggtctgcacgcggcgggcaaaggcattaacgttgccaaagtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccgacttcaacttttccggctttgacgtgactcccgcagactgggaacgctttgttaacgactccctgagctggctgggtcagttcgatatggtctgcgtcagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatctttgatagtagccgtgaagcgttagtcgccggtcttaaagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcacgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgtgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggtaagtaccgtcggcgcaggcgattccatggttggcggattgatttacggcctgctgatgcgcgagtccagcgaacatacgctgcgtctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga +>3 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>4 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>5 +ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctcttccgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccaacgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcagaaaggcgggattggtttgcgcggtccgggtgaaacccagctcgaaaccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgaaaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgctgctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaaatccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtctgcaggtacgaatgccgattgtcgactggcgtcgcctctgtaaacaagaaccagcgttgatcgaatacgtgatctag +>6 +gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga +>7 +atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctggtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatcagcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacgctgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcattcaaccggaatggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaacgctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgaggcgctggcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtgagctgaccagccttgtgcgcaaccttaatcaactgctcaaaagcgagcgcgaacgttataacaaataccgcacgaccctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatcagcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcataccccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttttggccgacagcatcccacacagaaagaggaataa +>8 +atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa +>9 +atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag +>10 +atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacattgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccaccgtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggccgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattactggaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcattcagggcggtacggaagtttgtcagcaaaaactggataccatgcgcaacgcaggcgtaaaagtgaacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggcgtcagcgcagagatcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacaggagaccggcaagctcggcgagatcctgttctttatgcgtgcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccggcaataactggcagttcgacggcgacgcggaaactattgcccactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgaaattaccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa +>11 +atgaaactttttctgacaacggcagcgctaactgcgaccttgatttccggcatggcatttgccagcgatcccgtcatcccctgggcgactaacagcggcggtacggaaagcacgcatattgcggcaatgggcgaggatttgaatgcgcagcatcagcagatcacccacacgcatgaaggcgtatgggcggccaattccggcagcattcaggctgatgaagccgcgctgaccagcaacaagccgccggtacaaggccacccggaattgatgccgcatcagggctaa +>12 +atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa +>13 +atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa +>14 +ctgaaactaatacctttttatttgctcgcgctattttctgccgcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaaccactgcagataacccgccttataccatgacaatattgccgccagtagaaggctgcggggaaaaatgtgtgaaactgaacgtacagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaaggacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctataaccgacagctgcaattactttgggctggcgtaataccgaaaaataaaaaggggaaatggataacgaaaatggaaaattactggttcaggcaggtaaaagcgcactaa +>15 +atgaaaaaattttatagctgtcttcctgtctttttactgatcggctgtgcccaggtgcccctcccttcctccgtgagcaaaccggtacagcaacctggcgctcagaaagagcaactggccaacgcaaatagtattgatgagtgtcagtctcttccgtatgtgccgtcagaccttgcgaagaataaatcattatcaaaccagaacgctgataattccgcatcaaaaaatagcgcaatcagctcaagcattttttgcgaaaaatataaacaaaccaaagaacaggcgcttaccttcttccaggaacatccacaatacatgcgttcgaaagaagatgaagagcaactcatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa +>16 +atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa +>17 +gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggcctcacaacatgcctggagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgtcagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcaccatccagtcggcaacctggttttccgcaggcaggtcgcttacggcggataaaaaaatgatggattgcggcagcggtatttatgcttcaatcaatacgttactgaaaaagtcgcagaataaaaatatagtcatttttacccataatcattgtttgacgtatattgcaaaaaataagcgcggcgtgaagtttgatccggactatcttaatgcgctcgttatgtatgcggaaaacggaaaactctttttagacggtgaatttgttcccggctag +>18 +atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga +>19 +atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/search/G5/blast/protein/hsps.txt b/tests/test_data/outputs/search/G5/blast/protein/hsps.txt new file mode 100755 index 0000000..823a08e --- /dev/null +++ b/tests/test_data/outputs/search/G5/blast/protein/hsps.txt @@ -0,0 +1,20 @@ +0 0 34 34 1 34 1 34 34 2 94.118 100 100 N/A 2.04e-17 57.8 +1 9 254 254 1 254 1 254 254 0 100.000 100 100 N/A 0.0 514 +2 10 286 286 1 286 1 286 286 17 94.056 100 100 N/A 0.0 518 +3 11 324 324 1 324 1 324 324 0 100.000 100 100 N/A 0.0 640 +4 12 366 366 1 366 1 366 366 0 100.000 100 100 N/A 0.0 754 +5 13 427 427 1 427 1 427 427 9 97.892 100 100 N/A 0.0 846 +6 14 478 478 1 478 1 478 478 0 100.000 100 100 N/A 0.0 972 +7 15 488 488 1 488 1 488 488 14 97.131 100 100 N/A 0.0 972 +8 16 612 612 1 612 1 612 612 0 100.000 100 100 N/A 0.0 1241 +9 17 638 638 1 638 1 638 638 0 100.000 100 100 N/A 0.0 1326 +10 18 679 679 1 679 1 679 679 8 98.822 100 100 N/A 0.0 1403 +11 1 95 95 1 95 1 95 95 11 88.421 100 100 N/A 2.89e-51 147 +12 19 1645 1645 1 1645 1 1645 1645 0 100.000 100 100 N/A 0.0 3332 +13 2 109 109 1 109 1 109 109 0 100.000 100 100 N/A 3.09e-81 224 +14 3 139 139 1 139 1 139 139 9 93.525 100 100 N/A 3.70e-98 269 +15 4 148 148 1 148 1 148 148 11 92.568 100 100 N/A 1.56e-97 268 +16 5 181 181 1 181 1 181 181 0 100.000 100 100 N/A 1.79e-140 380 +17 6 202 202 1 202 1 202 202 14 93.069 100 100 N/A 2.36e-141 384 +18 7 214 214 1 214 1 214 214 0 100.000 100 100 N/A 1.75e-157 426 +19 8 228 228 1 228 1 228 228 0 100.000 100 100 N/A 6.32e-172 463 diff --git a/tests/test_data/outputs/search/G5/blast/protein/queries.fasta b/tests/test_data/outputs/search/G5/blast/protein/queries.fasta new file mode 100755 index 0000000..46a4b87 --- /dev/null +++ b/tests/test_data/outputs/search/G5/blast/protein/queries.fasta @@ -0,0 +1,40 @@ +>0 +MY*TPFLEALMITASFFAIFIIIVVSVLLLEGD* +>1 +MRILLSNDDGVHAPGIQTLAKALREFADVQVVAPDRNRSGASNSLTLESSLRTFTFDNGDIAVQMGTPTDCVYLGVNALMRPRPDIVVSGINAGPNLGDDVIYSGTVAAAMEGRHLGFPALAVSLNGYQHYDTAAAVTCALLRGLSREPLRTGRILNVNVPDLPLAQVKGIRVTRCGSRHPADKVIPQEDPRGNTLYWIGPPGDKYDAGPDTDFAAVDEGYVSVTPLHVDLTAHSAHDVVSDWLDSVGVGTQW* +>2 +VNLVKTTGLHAAGKGINVAKVLKDLGIDVTVGGFLGKDNQDGFQQLFSELGIANRFQVVQGRTRINVKLTEKDGEVTDFNFSGFDVTPADWERFVNDSLSWLGQFDMVCVSGSLPAGVSPEAFTDWMTRLRSQCPCIIFDSSREALVAGLKAAPWLVKPNRRELEIWAGRKLPEMKDVIDAAHALREQGIAHVVISLGAEGALWVNASGEWIAKPPAVDVVSTVGAGDSMVGGLIYGLLMRESSEHTLRLATAVAALAVSQSNVGITDRPQLAAMMARVDLQPFN* +>3 +MNKNKYSTPLLMLATILAGMLSPMQSAVNGQLGHWLQDGNACAVISFASGLVVMFFIIIARKETRQQFASIPTLIKKRKIPLWNWFAGLCGAMVVFSEGASASALGVATFQTALISALLLSGLLCDRFGIGVEEKKYFTPWRITGALFAVIATIFVVSPQWHSTSFILLAILPFLAGLLAGWQPAGNAKVAEATGSMLVSITWNFIVGFCVLGAALAIRIALGHVTIQLPDTWWMYLGGPLGLLSIGLMAILVRGLGLLMLGVASTAGQLLGSVLIDELIPSLGNTVYLVTIIGTLFALVGAIVTTIPEYRASKMAQKMEVSE* +>4 +MKGRWAKYVATGVMLAMLAACSSKPTDRGQQYKDGKFTQPFSLVNQPDAVGAPINAGDFAEQVDQIRSASPRLYTNQSNVYNAVQNWLRSGGDTRTMRQFGIDAWQMEGTDNYGNVQFTGYYTPVVQARHTRQGAFQYPIYSMPPKRGRLPSRAQIYAGALSDKYILAWSNSLMDNFIMDVQGSGYIDFGDGSPLNFFSYAGKNGWPYRSIGKVLIDRGEVKKEDMSMQAIREWGEKHSEAEVRELLEQNPSFVFFKPQSFAPVKGASAVPLIGRASVASDRSIIPPGTTLLAEVPLLDNNGKFSGQYELRLMVALDVGGAIKGQHFDIYQGIGPDAGHRAGWYNHYGRVWVLKSAPGAGNVFSG* +>5 +LFDRYDAGEQAVLVHIYFSQDKDMEDLQEFESLVSSAGVEAMQVITGSRKAPHPKYFVGEGKAVEIAEAVKATGAAVVLFDHALSPAQERNLERLCECRVIDRTGLILDIFAQRARTHEGKLQVELAQLRHLATRLVRGWTHLERQKGGIGLRGPGETQLETDRRLLRNRIVQIQSRLEKVEKQREQGRQSRIKADVPTVSLVGYTNAGKSTLFNQITEARVYAADQLFATLDPTLRRIDVADVGETVLADTVGFIRHLPYDLVAAFKATLQETRQATLLLHVVDAADVRVQENIEAVNTVLEEIDAHEIPTLMVMNKIDMLDDFEPRIDRDEENKPIRVWLSAQSGVGIPQLFQALTERLSGEVAQHTLRLLPQEGRLRSRFYQLQAIEKEWMEEDGSVSLQVRMPIVDWRRLCKQEPALIEYVI* +>6 +VAQRILVLGASGYIGQHLVFALSQQGHQVRAAARRVERLEKHRLANVSCHKVDLHWPENLPALLRDIDTVYYLVHGMGEGGDFIAHERQAALNVRDALRQTPVKQLIFLSSLQAPAHEQSDHLRARQLTADTLRDAGVPVTELRAGIIVGAGSAAFEVMRDMVYNLPILTPPRWVRSRTTPIALENLLYYLVGLLEHPAHEHRILEAAGPQVLSYQQQFERFMAVSGKRRPLIPVPFPTRWISVWFLNVITSVPPTTAKALIQGLRHDLLADDAALKKLIPQTLITFDDAVRRTLKEEEKLVNSSDWGYDALAFARWRPEYGYFPKQAGFTAQTPASLSALWQVVNRLGGKEGYFFGNILWQTRAAMDRLVGHKLAKGRPSHTLLKPGDTVDSWKVIIVEPEKQLTLLFGMKAPGLGRLSFTLHDKGRYREIDVRAWWHPHGMPGLIYWLLMIPAHLFIFRGMARRIARLAEQITEK* +>7 +MNKFARHFLPLSLRVRFLLATAGVVLVLSLAYGIVALVGYSVSFDKTTFRLLRGESNLFYTLAKWENNKISVELPENLDMQSPTMTLIYDETGKLLWTQRNIPWLIKSIQPEWLKTNGFHEIETNVDATSTLLSEDHSAQEKLKEVREDDDDAEMTHSVAVNIYPATTRMPQLTIVVVDTIPIELKRSYMVWSWFVYVLAANLLLVIPLLWIAAWWSLRPIEALAREVRELEDHHREMLNPETTRELTSLVRNLNQLLKSERERYNKYRTTLTDLTHSLKTPLAVLQSTLRSLRNEKMSVSKAEPVMLEQISRISQQIGYYLHRASMRGSGVLLSRELHPVAPLLDNLISALNKVYQRKGVNISMDISPEISFVGEQNDFVEVMGNVLDNACKYCLEFVEISARQTDDHLHIFVEDDGPGIPHSKRSLVFDRGQRADTLRPGQGVGLAVAREITEQYAGQIIASDSLLGGARMEVVFGRQHPTQKEE* +>8 +MTIQKRLLEAVEQKLLRPIDAQFALTVAGNDDPAVTLAAALLSHDAGEGHVCLPLSRLTLTEEAHPLLVAWISETATPIDWKKRLLASAAVSCGDSPAPLILCGDRLYLNRMWCNERTVARFFNEVNQAIAVDEDQLSRILDALFPPTDEVNWQKVAAAVALTRRISVISGGPGTGKTTTVAKLLAALIQMADGERCRIRLAAPTGKAAARLTESLGAALRQLPLTDAQKKRIPEDASTLHRLLGAQPGSQRLRHHAGNPLHLDVLVVDEASMIDLPMMSRLIDALPPHGRVIFLGDRDQLASVEAGAVLGDICAYVNAGFTAERARQLSRLTGSAIPAGAGTQAASLRDSLCLLQKSYRFGSDSGIGKLAAAINCGDRSAIQAVFQQGFSDIEKRTLQSSDDYAGMLDEALAGYGRYLRLLHEKAAPEAILQAFNEYQLLCALREGPFGVRGLNDRIEQAMVQQRKIQRHPHSRWYEGRPVMIARNDSALGLFNGDIGIALDRGQGLRVWFVMPDGTIKSVQPSRLPEHDTTWAMTVHKSQGSEFDHAALILPSQRSPVVTRELVYTAVTRARRRLSLYADERILAGAIVTRTERRSGLATLFDEVSRIG* +>9 +MQEVAMSSQEASKMLRTYNIAWWGNNYYDVNELGHISVCPDPDVPEARVDLAKLVKAREAQGQRLPALFCFPQILQHRLRSINAAFKRARESYGYNGDYFLVYPIKVNQHRRVIESLIHSGEPLGLEAGSKAELMAVLAHAGMTRSVIVCNGYKDREYIRLALIGEKMGHKVYLVIEKMSEIAIVLEEAERLNVVPRLGVRARLASQGSGKWQSSGGEKSKFGLAATQVLQLVETLRDAGRLDSLQLLHFHLGSQMANIRDIATGVRESARFYVELHKLGVNIQCFDVGGGLGVDYEGTRSQSDCSVNYGLNEYANNIIWAIGDACEEHGLPHPTVITESGRAVTAHHTVLVSNIIGVERNEYTDPTAPAEDAPRALQNLWETWQEMHKPGTRRSLREWLHDSQMDLHDIHIGYSSGAFSLQERAWAEQLYLSMCHEVQKQLDPQNRAHRPIIDELQERMADKMYVNFSLFQSMPDAWGIDQLFPVLPLEGLDQVPERRAVLLDITCDSDGAIDHYIDGDGIATTMPMPEYDPENPPMLGFFMVGAYQEILGNMHNLFGDTEAVDVFVFPDGSVEVELSDEGDTVADMLQYVQLDPKTLLTHFRDQVKQTDLDDALQQQFLEEFEAGLYGYTYLEDE* +>10 +MNSLPQRSTDFELTTSQDGFALSWQQRLILRHSAENPCLWIGAGVADIDMFRGNFSIKDKLNEKIALTEATVSELPDGWLVQFSRGATISATLRISADEAGRLTLDLQNDDLHHNRIWLRLAANPDDHIYGCGEQFSYFDLRGKPFPLWTSEQGVGRNKTSYVTWQADCKENAGGDYYWTFFPQPTFVSTQKYYCHVDNSCYMNFDFSAPEYHELALWEDKTTLRFECADTYIALLEKLTALLGRQPELPDWVYDGVTLGIQGGTEVCQQKLDTMRNAGVKVNGIWAQDWSGIRMTSFGKRVMWNWKWNSDNYPQLDSRIKQWKEEGVQFLSYINPYVASDKDLCAEAARHGYLAKDATGGDYLVEFGEFYGGVVDLTNPEAYDWFKDVIKKNMIALGCSGWMADFGEYLPTDTYLHNGVSAEIMHNAWPALWAKCNYEALQETGKLGEILFFMRAGYTGSQKYSTMMWAGDQNVDWSLDDGLASVVPAALSLAMTGHGLHHSDIGGYTTLFDMKRSKELLLRWCDFSAFTPMMRTHEGNRPGNNWQFDGDAETIAHFARMTTVFTTLKPYLKQAVAQNAATGLPVMRPLFLHYENDAATYTLKYQYLLGQDLLVAPVHEQGRCDWTLYLPEDHWVNIWTGEAHHGGEITVDAPIGKPPVFYRAKSEWALLFASLRNI* +>11 +MKLFLTTAALTATLISGMAFASDPVIPWATNSGGTESTHIAAMGEDLNAQHQQITHTHEGVWAANSGSIQADEAALTSNKPPVQGHPELMPHQG* +>12 +MKHLRVVACMIMLALAGCDNNDKTAPTTKSEAPAVAQPSPAQDPSQLQKLAQQSQGKALTLLDASEAQLDGAATLVLTFSIPLDPEQDFSRVVHVVDKKSGSVDGAWELAPNLKELRLRHLEPERVLVVTVDPAVKALNNATFGKSYEKTITTRDVQPSVGFASRGSLLPGKIAEGLPVMALNVNHVDVNFFRVKPGSLASFVSQWEYRSSLSNWESDNLLKMADLVYTGRFDLNPARNTREKLLLPLSDIKPLQQAGVYVAVMNQAGHYNYSNAATLFTLSDIGVSAHRYHSRLDIFTQSLENGAAQSGIEIVLLNDKGQTLAQATSDAQGHVQLEADKAAALLLARKEGQTTLLDLTLPALDLSEFNVAGAPGYSKQFFMFGPRDLYRPGETVILNGLLRDSDGKTLPDQPVKLEVVKPDGQVMRTVVSQPENGLYRLNYPLDINAPTGLWHVRANTGDNLLRSWDFHVEDFMPERMALNLTAQKTPLAPADEVKFSVVGYYLYGAPANGNTLQGQLFLRPLRDAVAALPGFQFGNIAEENLSRSLDEVQLTLDKGGRGEVSAASQWQEAHSPLQVILQASLLESGGRPVTRRVEQAIWPADTLPGIRPQFAAKAVYDYRTDTTVNQPIVDEDSNAAFDIVYANAQGEKKAVSGLQVRLIRERRDYYWNWSESEGWQSQFDQKDLVEGEQTLDLNADETGKVSFPVEWGAYRLEVKAPNETVSSVRFWAGYSWQDNSDGSGAARPDRVTLKLDKANYRPGDTMKLHIAAPVAGKGYAMVESSDGPLWWQAIDVPAQGLELTIPVDKTWNRHDLYLSTLVVRPGDKSRSATPKRAVGLLHLPLGDDNRRLDLALESPAKMRPNQPLTVRVKASVKHGEMPKQINVLVSAVDSGVLNITDYATPDPWQAFFGQKRYGADIYDIYGQVIEGQGRLAALRFGGDGDDLTRGGKPPVNHANIIAQQAQPITLNEQGEGVVTLPIGDFNGELRVMAQAWTADDFGRGESKVVVAAPVIAELNMPRFLAGGDVSRLVLDVTNLTDRPQTLNIALAASGLLELLSQQPQPVNLAPGVRTTLFVPVRALEGFGEGEIQATISGLNLPGETLGAQHKQWQIGVRPAWPAQTVNSGIALAPGESWHVPEQHLANVSPATLQGQLLLSGKPPLNLARYIRELKAYPYGCLEQTTSGLFPALYTNAAQLQSLGITGDSDEKRRAAVDIGISRILQMQRDNGGFALWDENGAEEPWLTAYAMDFLIRAGEQGYSVPPEAINRGNERLLRYLQDPGTMLIRYSDNTQASTFAAQAYAALVLARQQKAPLGALREIWERRSQAASGLPLMQLGIALNTMGDARRGEEAITLALNTPRQDERQWIADYGSSLRDNALMLSLLEENNLRPDAQNALLSSLSEQAFGQRWLSTQENNALFLAAHSRQASAGAWQAQTSLEAQPLSGDKALTRNLDADQLAALEVTNTGSQPLWLRLDSSGYPSSAPEPASNVLQIERQILGTDGQRKSLSSLRSGELVLVWLTVVADRNVPDALVVDLLPAGLELENQNLADSSASLPESGSEVQNLLNQMQQADIQYMEFRDDRFVAAVVVNEGQPVTLVYLARAVTPGTYQLAQPQVESMYAPQWRATGASEGLLIVTP* +>13 +MAKNRSRRLRKKMHIDEFQELGFSVAWRFPEGTSEEQIDKTVDDFINDVIEPNKLAFDGSGYLAWEGLICMQEIGKCTEEHQAIVRKWLEARNLEEVRTSELFDVWWD* +>14 +LKLIPFYLLALFSAASGATEINACKDLIGTWKTTADNPPYTMTILPPVEGCGEKCVKLNVQYELDVTHRNALYCHEGQEGVKGQGPMVIAFEGAYGGHAIGTYNRQLQLLWAGVIPKNKKGKWITKMENYWFRQVKAH* +>15 +MKKFYSCLPVFLLIGCAQVPLPSSVSKPVQQPGAQKEQLANANSIDECQSLPYVPSDLAKNKSLSNQNADNSASKNSAISSSIFCEKYKQTKEQALTFFQEHPQYMRSKEDEEQLMTEFKKVLLEPGSKNLSIYQTLLAAHERLQAL* +>16 +MRIKPDDNWRWYYDEEHDRMMLDLANGMLFRSRFSRKMLTPDAFCPTGFCVDDAALYFSFEEKCRDFELTKEQRAELVLNALVAIRYLKPQMPKSWHFVAHGEMWTPGTGDAASVWLSDTAEQVNLLVVEPGENAALCLLAQPGVVIAGRTMQLGDAIKIMNDRLKPQVHCHSFSLEQAV* +>17 +VLAFTLRFIKNKRYFAILAGALVIIAGLASQHAWSGNGLPQINGKALAALAKQHPVVVLFRHAERCDRSDNTCLSDSTGITVNGAQDARALGKAFSADIQNYNLYSSNTVRTIQSATWFSAGRSLTADKKMMDCGSGIYASINTLLKKSQNKNIVIFTHNHCLTYIAKNKRGVKFDPDYLNALVMYAENGKLFLDGEFVPG* +>18 +MGSNYIVIEGLEGAGKTTARDVVVETLEQLGIRNMIFTREPGGTQLAEKLRSLVLDIRSVGDEVITDKAEVLMFYAARVQLVETVIKPALAQGVWVIGDRHDLSTQAYQGGGRGIDQTMLATLRDAVLGDFRPDLTLYLDVTPEVGLKRARARGDLDRIEQESFDFFNRTRARYLELAAQDSRIRTIDATQPLDAVMRDIRATVTKWVQEQAA* +>19 +MKHIKKSVLVVLLTSHVAHASIVVGGTRLVFDGNNDESSINVENKDSKANLVQSWLSVADPQVTNKQVFIITPPLFRLDAGQKNSIRVIRSGAPLPADRESMYWLNIKGIPSIDDNASANRVEISINTQIKLIYRPPALTKSTPDSQSQQLKWQTAGDVITVNNPTPYYMNFASVTLNSHEVKSATFVPPKSSASFKLSSTAAPHGTVTWRLISDYGMSLEPHSGSF* diff --git a/tests/test_data/outputs/search/G5/run.json b/tests/test_data/outputs/search/G5/run.json new file mode 100755 index 0000000..a639e7d --- /dev/null +++ b/tests/test_data/outputs/search/G5/run.json @@ -0,0 +1,27 @@ +{ + "analysis_start_time": "10/06/2024 11:11:57", + "parameters": { + "query": "locidex/extract/G5/raw.extracted.seqs.fasta", + "outdir": "locidex/search/G5", + "name": "G5", + "db": "locidex/db", + "config": null, + "min_evalue": 0.0001, + "min_dna_len": 1, + "min_aa_len": 1, + "max_dna_len": 10000000, + "max_aa_len": 10000000, + "min_dna_ident": 80.0, + "min_aa_ident": 80.0, + "min_dna_match_cov": 80.0, + "min_aa_match_cov": 80.0, + "max_target_seqs": 10, + "n_threads": 8, + "format": null, + "translation_table": 11, + "annotate": false, + "force": true + }, + "result_file": "locidex/search/G5/seq_store.json", + "analysis_end_time": "10/06/2024 11:11:59" +} \ No newline at end of file diff --git a/tests/test_data/outputs/search/G5/seq_store.json b/tests/test_data/outputs/search/G5/seq_store.json new file mode 100755 index 0000000..fbd2b3d --- /dev/null +++ b/tests/test_data/outputs/search/G5/seq_store.json @@ -0,0 +1,1744 @@ +{ + "db_info": { + "db_name": "Locidex Test Database", + "db_version": "1.0.0", + "db_date": "2024/06/06", + "db_author": "James Robertson", + "db_desc": "Using Salmonella GCA_000007545.2 as source data", + "db_num_seqs": 20, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" + }, + "db_seq_info": { + "0": { + "seq_id": 0, + "locus_name": "locus_1", + "locus_name_alt": "SALM_11273", + "locus_product": "!", + "locus_description": "hypothetical protein", + "locus_uid": "1", + "dna_seq_len": 102, + "dna_seq_hash": "d17b02d12afa7f832ee37df6f24a8f55", + "aa_seq_len": 34, + "aa_seq_hash": "84ac553cb45dd790c497c27152888f02", + "dna_min_len": 71.4, + "dna_max_len": 132.6, + "aa_min_len": 23.8, + "aa_max_len": 44.2, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "1": { + "seq_id": 1, + "locus_name": "locus_2", + "locus_name_alt": "SALM_120", + "locus_product": "@", + "locus_description": "outer membrane protein", + "locus_uid": "B", + "dna_seq_len": 285, + "dna_seq_hash": "e35184c8ff18e9116fc8faef20532f56", + "aa_seq_len": 95, + "aa_seq_hash": "2d9e7f7f0d11bf02db860b0799e7924d", + "dna_min_len": 199.5, + "dna_max_len": 370.5, + "aa_min_len": 66.5, + "aa_max_len": 123.5, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "2": { + "seq_id": 2, + "locus_name": "locus_3", + "locus_name_alt": "SALM_2016", + "locus_product": "#", + "locus_description": "tRNA (guanosine(46)-N7)-methyltransferase TrmB", + "locus_uid": "C", + "dna_seq_len": 327, + "dna_seq_hash": "670705cd2a59c4a23a897ac656a888fe", + "aa_seq_len": 109, + "aa_seq_hash": "4277f8bbf1fd6682001da089fea83d04", + "dna_min_len": 228.9, + "dna_max_len": 425.1, + "aa_min_len": 76.3, + "aa_max_len": 141.7, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "3": { + "seq_id": 3, + "locus_name": "locus_4", + "locus_name_alt": "SALM_8644", + "locus_product": "$", + "locus_description": "AZ624_004720", + "locus_uid": "AZ624_004720", + "dna_seq_len": 417, + "dna_seq_hash": "ac1b21798c0f672ad26f5a91ea278590", + "aa_seq_len": 139, + "aa_seq_hash": "0c25367401155278f34832f184ab44e6", + "dna_min_len": 291.9, + "dna_max_len": 542.1, + "aa_min_len": 97.3, + "aa_max_len": 180.7, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "4": { + "seq_id": 4, + "locus_name": "locus_5", + "locus_name_alt": "SALM_1876", + "locus_product": "%", + "locus_description": "SPI-1 type III secretion system invasion lipoprotein InvH", + "locus_uid": "E", + "dna_seq_len": 444, + "dna_seq_hash": "d00defcca8588f21ce16fa1d0ac13389", + "aa_seq_len": 148, + "aa_seq_hash": "c8bf12a8057fc5e541fcd4924136a40d", + "dna_min_len": 310.8, + "dna_max_len": 577.2, + "aa_min_len": 103.6, + "aa_max_len": 192.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "5": { + "seq_id": 5, + "locus_name": "locus_6", + "locus_name_alt": "SALM_640", + "locus_product": "^", + "locus_description": "MOSC domain-containing protein", + "locus_uid": "F", + "dna_seq_len": 543, + "dna_seq_hash": "a11561f2804e2c32c78049f8b9aeb517", + "aa_seq_len": 181, + "aa_seq_hash": "1f6a45ea291940ef2c17ec1cfdd5fbdd", + "dna_min_len": 380.1, + "dna_max_len": 705.9, + "aa_min_len": 126.7, + "aa_max_len": 235.3, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "6": { + "seq_id": 6, + "locus_name": "locus_7", + "locus_name_alt": "SALM_1501", + "locus_product": "&", + "locus_description": "India: Vellore", + "locus_uid": "G", + "dna_seq_len": 606, + "dna_seq_hash": "dc94bf1ec4ff9bed2a1f460cbd958656", + "aa_seq_len": 202, + "aa_seq_hash": "62252b3326997117f127efb88ff09294", + "dna_min_len": 424.2, + "dna_max_len": 787.8, + "aa_min_len": 141.4, + "aa_max_len": 262.6, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "7": { + "seq_id": 7, + "locus_name": "locus_8", + "locus_name_alt": "SALM_756", + "locus_product": "*", + "locus_description": "DNA polymerase III subunit delta'", + "locus_uid": "H", + "dna_seq_len": 642, + "dna_seq_hash": "7ebe74afecf146ec4db816c8deced64f", + "aa_seq_len": 214, + "aa_seq_hash": "2449629747a7c58f0f2cad411db87178", + "dna_min_len": 449.4, + "dna_max_len": 834.6, + "aa_min_len": 149.8, + "aa_max_len": 278.2, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "8": { + "seq_id": 8, + "locus_name": "locus_9", + "locus_name_alt": "SALM_7353", + "locus_product": "1", + "locus_description": "fimbrial assembly chaperone", + "locus_uid": "I", + "dna_seq_len": 684, + "dna_seq_hash": "41ebb36872854b2b33c8c028e23d8ad1", + "aa_seq_len": 228, + "aa_seq_hash": "a75a93991228940fb46d917f238beb5a", + "dna_min_len": 478.8, + "dna_max_len": 889.2, + "aa_min_len": 159.6, + "aa_max_len": 296.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "9": { + "seq_id": 9, + "locus_name": "locus_10", + "locus_name_alt": "SALM_1891", + "locus_product": "200.96", + "locus_description": "5'/3'-nucleotidase SurE", + "locus_uid": "J", + "dna_seq_len": 762, + "dna_seq_hash": "fe04d17ec353c08b903c85fc0ca4dc02", + "aa_seq_len": 254, + "aa_seq_hash": "bd09702e070040e0fc8d2ec3b830812c", + "dna_min_len": 533.4, + "dna_max_len": 990.6, + "aa_min_len": 177.8, + "aa_max_len": 330.2, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "10": { + "seq_id": 10, + "locus_name": "locus_11", + "locus_name_alt": "SALM_1452", + "locus_product": "|", + "locus_description": "1-phosphofructokinase", + "locus_uid": "K", + "dna_seq_len": 858, + "dna_seq_hash": "5b128d659955716833ce42f2bb060212", + "aa_seq_len": 286, + "aa_seq_hash": "4daecf1a6ccae76e1d97a4ce9ee4ff0b", + "dna_min_len": 600.6, + "dna_max_len": 1115.4, + "aa_min_len": 200.2, + "aa_max_len": 371.8, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "11": { + "seq_id": 11, + "locus_name": "locus_12", + "locus_name_alt": "SALM_11020", + "locus_product": "_", + "locus_description": "1", + "locus_uid": "L", + "dna_seq_len": 972, + "dna_seq_hash": "eb72da68c159497d5f0c8eeddc51b5ae", + "aa_seq_len": 324, + "aa_seq_hash": "a1f16dd269dc295e715f2d00f9c26b43", + "dna_min_len": 680.4, + "dna_max_len": 1263.6, + "aa_min_len": 226.8, + "aa_max_len": 421.2, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "12": { + "seq_id": 12, + "locus_name": "locus_13", + "locus_name_alt": "SALM_1934", + "locus_product": "-", + "locus_description": "3.9", + "locus_uid": "M", + "dna_seq_len": 1098, + "dna_seq_hash": "8f300259dcb46224bdc1fe5273107324", + "aa_seq_len": 366, + "aa_seq_hash": "b16bc26e42f5bc4327504b1ec8b2d53d", + "dna_min_len": 768.6, + "dna_max_len": 1427.4, + "aa_min_len": 256.2, + "aa_max_len": 475.8, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "13": { + "seq_id": 13, + "locus_name": "locus_14", + "locus_name_alt": "SALM_2871", + "locus_product": "+", + "locus_description": "@", + "locus_uid": "N", + "dna_seq_len": 1281, + "dna_seq_hash": "b9060019038526aa6fc38d2f7510edc6", + "aa_seq_len": 427, + "aa_seq_hash": "ed11561b2bedaa12c7a28eb0e9346101", + "dna_min_len": 896.7, + "dna_max_len": 1665.3, + "aa_min_len": 298.9, + "aa_max_len": 555.1, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "14": { + "seq_id": 14, + "locus_name": "locus_15", + "locus_name_alt": "SALM_583", + "locus_product": "=", + "locus_description": "DMT family transporter", + "locus_uid": "O", + "dna_seq_len": 1434, + "dna_seq_hash": "bc98c2fe196a68a79036814396513a8d", + "aa_seq_len": 478, + "aa_seq_hash": "16cb2acec887d5861327e04f2705b8ce", + "dna_min_len": 1003.8, + "dna_max_len": 1864.2, + "aa_min_len": 334.6, + "aa_max_len": 621.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "15": { + "seq_id": 15, + "locus_name": "locus_16", + "locus_name_alt": "SALM_780", + "locus_product": "<", + "locus_description": "murein transglycosylase A", + "locus_uid": "P", + "dna_seq_len": 1464, + "dna_seq_hash": "16e55766c603fe33c9e75d8e81743ae2", + "aa_seq_len": 488, + "aa_seq_hash": "b20314e55f9713235e9c4ea5817b56df", + "dna_min_len": 1024.8, + "dna_max_len": 1903.2, + "aa_min_len": 341.6, + "aa_max_len": 634.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "16": { + "seq_id": 16, + "locus_name": "locus_17", + "locus_name_alt": "SALM_1937", + "locus_product": ">", + "locus_description": "GTPase HflX", + "locus_uid": "Q", + "dna_seq_len": 1836, + "dna_seq_hash": "a0d97d985483413f3c18bfe5833ae9ce", + "aa_seq_len": 612, + "aa_seq_hash": "d6012eddc7a6e8d7d40761d00ed71a5a", + "dna_min_len": 1285.2, + "dna_max_len": 2386.8, + "aa_min_len": 428.4, + "aa_max_len": 795.6, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "17": { + "seq_id": 17, + "locus_name": "locus_18", + "locus_name_alt": "SALM_1997", + "locus_product": "?", + "locus_description": "biosynthetic arginine decarboxylase", + "locus_uid": "R", + "dna_seq_len": 1914, + "dna_seq_hash": "b3021e979faa7600756c06dfadfcf14c", + "aa_seq_len": 638, + "aa_seq_hash": "e68e83956ee1d4c685571e5348c8def1", + "dna_min_len": 1339.8, + "dna_max_len": 2488.2, + "aa_min_len": 446.6, + "aa_max_len": 829.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "18": { + "seq_id": 18, + "locus_name": "locus_19", + "locus_name_alt": "SALM_9926", + "locus_product": ",", + "locus_description": "https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662", + "locus_uid": "S", + "dna_seq_len": 2037, + "dna_seq_hash": "a012eee23637b48e39b00808a057e35d", + "aa_seq_len": 679, + "aa_seq_hash": "1302fea1dbd5bb756db34e09b863ad44", + "dna_min_len": 1425.9, + "dna_max_len": 2648.1, + "aa_min_len": 475.3, + "aa_max_len": 882.7, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "19": { + "seq_id": 19, + "locus_name": "locus_20", + "locus_name_alt": "SALM_6064", + "locus_product": ".", + "locus_description": "alpha-2-macroglobulin family protein", + "locus_uid": "T", + "dna_seq_len": 4935, + "dna_seq_hash": "4461918e985715e4a2b07494e1f91326", + "aa_seq_len": 1645, + "aa_seq_hash": "12ff39a1933fa478729b142976b0a659", + "dna_min_len": 3454.5, + "dna_max_len": 6415.5, + "aa_min_len": 1151.5, + "aa_max_len": 2138.5, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + } + }, + "query_data": { + "sample_name": "G5", + "query_seq_data": { + "0": { + "parent_id": "locus_1:0:0:0", + "locus_name": "locus_1:0:0:0", + "seq_id": "locus_1:0:0:0", + "dna_hash": "e9e707ebc64e10a881f1323ebff85369", + "dna_len": 102, + "aa_hash": "daa2576d97f92c0fe9161c023757d495", + "aa_len": 34, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 1, + "dna_ambig_count": 0 + }, + "1": { + "parent_id": "locus_10:9:0:1", + "locus_name": "locus_10:9:0:1", + "seq_id": "locus_10:9:0:1", + "dna_hash": "fe04d17ec353c08b903c85fc0ca4dc02", + "dna_len": 762, + "aa_hash": "988bf512f0362e276b0e5622fbaa7079", + "aa_len": 254, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "2": { + "parent_id": "locus_11:10:0:2", + "locus_name": "locus_11:10:0:2", + "seq_id": "locus_11:10:0:2", + "dna_hash": "c4266f2f24fdd8e039113c6b0955af9f", + "dna_len": 858, + "aa_hash": "9b9be0e0a2b6f84053716d6c14a0fb9a", + "aa_len": 286, + "start_codon": "gtg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "3": { + "parent_id": "locus_12:11:0:3", + "locus_name": "locus_12:11:0:3", + "seq_id": "locus_12:11:0:3", + "dna_hash": "eb72da68c159497d5f0c8eeddc51b5ae", + "dna_len": 972, + "aa_hash": "2fb77f6d0f615a030a08af74f3eda277", + "aa_len": 324, + "start_codon": "atg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "4": { + "parent_id": "locus_13:12:0:4", + "locus_name": "locus_13:12:0:4", + "seq_id": "locus_13:12:0:4", + "dna_hash": "8f300259dcb46224bdc1fe5273107324", + "dna_len": 1098, + "aa_hash": "fd5140611af6e0d9f95426bf9beaa8b1", + "aa_len": 366, + "start_codon": "atg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "5": { + "parent_id": "locus_14:13:0:5", + "locus_name": "locus_14:13:0:5", + "seq_id": "locus_14:13:0:5", + "dna_hash": "2fa0b06ed72e36b4071cab9d0b4f87d0", + "dna_len": 1281, + "aa_hash": "bf5190f310477277da454725d434a8ee", + "aa_len": 427, + "start_codon": "ttg", + "stop_codon": "tag", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "6": { + "parent_id": "locus_15:14:0:6", + "locus_name": "locus_15:14:0:6", + "seq_id": "locus_15:14:0:6", + "dna_hash": "bc98c2fe196a68a79036814396513a8d", + "dna_len": 1434, + "aa_hash": "5d8c3cf06444c3741ebfc57cc92db0d3", + "aa_len": 478, + "start_codon": "gtg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "7": { + "parent_id": "locus_16:15:0:7", + "locus_name": "locus_16:15:0:7", + "seq_id": "locus_16:15:0:7", + "dna_hash": "a9b3cb97dac3cda6e932a49bf9a507bd", + "dna_len": 1464, + "aa_hash": "3ca5f1d7b46eda9460608ef61603c12f", + "aa_len": 488, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "8": { + "parent_id": "locus_17:16:0:8", + "locus_name": "locus_17:16:0:8", + "seq_id": "locus_17:16:0:8", + "dna_hash": "a0d97d985483413f3c18bfe5833ae9ce", + "dna_len": 1836, + "aa_hash": "bdd2b967a7a6337b5973dd723a62fa43", + "aa_len": 612, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "9": { + "parent_id": "locus_18:17:0:9", + "locus_name": "locus_18:17:0:9", + "seq_id": "locus_18:17:0:9", + "dna_hash": "b3021e979faa7600756c06dfadfcf14c", + "dna_len": 1914, + "aa_hash": "42c4a831ee79a27c47138fe96829814b", + "aa_len": 638, + "start_codon": "atg", + "stop_codon": "tag", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "10": { + "parent_id": "locus_19:18:0:10", + "locus_name": "locus_19:18:0:10", + "seq_id": "locus_19:18:0:10", + "dna_hash": "de32372598811d63bcc1a0eaf6872644", + "dna_len": 2037, + "aa_hash": "a48a4e4dc8c7f61a7be06a7f72142198", + "aa_len": 679, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "11": { + "parent_id": "locus_2:1:0:11", + "locus_name": "locus_2:1:0:11", + "seq_id": "locus_2:1:0:11", + "dna_hash": "8b70e777f6bbf2c91ff75947824b5976", + "dna_len": 285, + "aa_hash": "6e403f4ed2da629ea2ebfe18278ed120", + "aa_len": 95, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "12": { + "parent_id": "locus_20:19:0:12", + "locus_name": "locus_20:19:0:12", + "seq_id": "locus_20:19:0:12", + "dna_hash": "4461918e985715e4a2b07494e1f91326", + "dna_len": 4935, + "aa_hash": "ffe361d9273afbab0ed9009dd5aa4041", + "aa_len": 1645, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "13": { + "parent_id": "locus_3:2:0:13", + "locus_name": "locus_3:2:0:13", + "seq_id": "locus_3:2:0:13", + "dna_hash": "670705cd2a59c4a23a897ac656a888fe", + "dna_len": 327, + "aa_hash": "f46f4f7b2362abaf1440c9752e10670e", + "aa_len": 109, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "14": { + "parent_id": "locus_4:3:0:14", + "locus_name": "locus_4:3:0:14", + "seq_id": "locus_4:3:0:14", + "dna_hash": "73790840c76943caac0ebb3b2b3f0b98", + "dna_len": 417, + "aa_hash": "77784601d754a5f36152853592023b08", + "aa_len": 139, + "start_codon": "ctg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "15": { + "parent_id": "locus_5:4:0:15", + "locus_name": "locus_5:4:0:15", + "seq_id": "locus_5:4:0:15", + "dna_hash": "8cf4341689dd00f74adfcc43d1f4a35e", + "dna_len": 444, + "aa_hash": "736cc3184dda2c5ac596f76753272622", + "aa_len": 148, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "16": { + "parent_id": "locus_6:5:0:16", + "locus_name": "locus_6:5:0:16", + "seq_id": "locus_6:5:0:16", + "dna_hash": "a11561f2804e2c32c78049f8b9aeb517", + "dna_len": 543, + "aa_hash": "f7a904be6607ca5a6d9388ad9ca3693a", + "aa_len": 181, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "17": { + "parent_id": "locus_7:6:0:17", + "locus_name": "locus_7:6:0:17", + "seq_id": "locus_7:6:0:17", + "dna_hash": "49d9878c9d3071aa1d2f26cb947b784c", + "dna_len": 606, + "aa_hash": "a1169e1ef4c2882247a9349da07cb6bd", + "aa_len": 202, + "start_codon": "gtg", + "stop_codon": "tag", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "18": { + "parent_id": "locus_8:7:0:18", + "locus_name": "locus_8:7:0:18", + "seq_id": "locus_8:7:0:18", + "dna_hash": "7ebe74afecf146ec4db816c8deced64f", + "dna_len": 642, + "aa_hash": "ad5a47cb3a55985083b963b0cf67f382", + "aa_len": 214, + "start_codon": "atg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "19": { + "parent_id": "locus_9:8:0:19", + "locus_name": "locus_9:8:0:19", + "seq_id": "locus_9:8:0:19", + "dna_hash": "41ebb36872854b2b33c8c028e23d8ad1", + "dna_len": 684, + "aa_hash": "02989d1dbb505d15f06e76ddc9174b7e", + "aa_len": 228, + "start_codon": "atg", + "stop_codon": "tag", + "count_internal_stop": 0, + "dna_ambig_count": 0 + } + }, + "query_hit_columns": [], + "query_hits": { + "0": { + "nucleotide": [ + { + "qseqid": 0, + "sseqid": 0, + "qlen": 102, + "slen": 102, + "qstart": 1, + "qend": 102, + "sstart": 1, + "send": 102, + "length": 103, + "mismatch": 1, + "pident": 97.087, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 1.81e-46, + "bitscore": 172 + } + ], + "protein": [ + { + "qseqid": 0, + "sseqid": 0, + "qlen": 34, + "slen": 34, + "qstart": 1, + "qend": 34, + "sstart": 1, + "send": 34, + "length": 34, + "mismatch": 2, + "pident": 94.118, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 2.04e-17, + "bitscore": 57.8 + } + ] + }, + "1": { + "nucleotide": [ + { + "qseqid": 1, + "sseqid": 9, + "qlen": 762, + "slen": 762, + "qstart": 1, + "qend": 762, + "sstart": 1, + "send": 762, + "length": 762, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1408 + } + ], + "protein": [ + { + "qseqid": 1, + "sseqid": 9, + "qlen": 254, + "slen": 254, + "qstart": 1, + "qend": 254, + "sstart": 1, + "send": 254, + "length": 254, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 514.0 + } + ] + }, + "2": { + "nucleotide": [ + { + "qseqid": 2, + "sseqid": 10, + "qlen": 858, + "slen": 858, + "qstart": 1, + "qend": 858, + "sstart": 1, + "send": 858, + "length": 858, + "mismatch": 19, + "pident": 97.786, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1480 + } + ], + "protein": [ + { + "qseqid": 2, + "sseqid": 10, + "qlen": 286, + "slen": 286, + "qstart": 1, + "qend": 286, + "sstart": 1, + "send": 286, + "length": 286, + "mismatch": 17, + "pident": 94.056, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 518.0 + } + ] + }, + "3": { + "nucleotide": [ + { + "qseqid": 3, + "sseqid": 11, + "qlen": 972, + "slen": 972, + "qstart": 1, + "qend": 972, + "sstart": 1, + "send": 972, + "length": 972, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1796 + } + ], + "protein": [ + { + "qseqid": 3, + "sseqid": 11, + "qlen": 324, + "slen": 324, + "qstart": 1, + "qend": 324, + "sstart": 1, + "send": 324, + "length": 324, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 640.0 + } + ] + }, + "4": { + "nucleotide": [ + { + "qseqid": 4, + "sseqid": 12, + "qlen": 1098, + "slen": 1098, + "qstart": 1, + "qend": 1098, + "sstart": 1, + "send": 1098, + "length": 1098, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 2028 + } + ], + "protein": [ + { + "qseqid": 4, + "sseqid": 12, + "qlen": 366, + "slen": 366, + "qstart": 1, + "qend": 366, + "sstart": 1, + "send": 366, + "length": 366, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 754.0 + } + ] + }, + "5": { + "nucleotide": [ + { + "qseqid": 5, + "sseqid": 13, + "qlen": 1281, + "slen": 1281, + "qstart": 1, + "qend": 1281, + "sstart": 1, + "send": 1281, + "length": 1281, + "mismatch": 11, + "pident": 99.141, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 2305 + } + ], + "protein": [ + { + "qseqid": 5, + "sseqid": 13, + "qlen": 427, + "slen": 427, + "qstart": 1, + "qend": 427, + "sstart": 1, + "send": 427, + "length": 427, + "mismatch": 9, + "pident": 97.892, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 846.0 + } + ] + }, + "6": { + "nucleotide": [ + { + "qseqid": 6, + "sseqid": 14, + "qlen": 1434, + "slen": 1434, + "qstart": 1, + "qend": 1434, + "sstart": 1, + "send": 1434, + "length": 1434, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 2649 + } + ], + "protein": [ + { + "qseqid": 6, + "sseqid": 14, + "qlen": 478, + "slen": 478, + "qstart": 1, + "qend": 478, + "sstart": 1, + "send": 478, + "length": 478, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 972.0 + } + ] + }, + "7": { + "nucleotide": [ + { + "qseqid": 7, + "sseqid": 15, + "qlen": 1464, + "slen": 1464, + "qstart": 1, + "qend": 1464, + "sstart": 1, + "send": 1464, + "length": 1464, + "mismatch": 15, + "pident": 98.975, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 2621 + } + ], + "protein": [ + { + "qseqid": 7, + "sseqid": 15, + "qlen": 488, + "slen": 488, + "qstart": 1, + "qend": 488, + "sstart": 1, + "send": 488, + "length": 488, + "mismatch": 14, + "pident": 97.131, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 972.0 + } + ] + }, + "8": { + "nucleotide": [ + { + "qseqid": 8, + "sseqid": 16, + "qlen": 1836, + "slen": 1836, + "qstart": 1, + "qend": 1836, + "sstart": 1, + "send": 1836, + "length": 1836, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 3391 + } + ], + "protein": [ + { + "qseqid": 8, + "sseqid": 16, + "qlen": 612, + "slen": 612, + "qstart": 1, + "qend": 612, + "sstart": 1, + "send": 612, + "length": 612, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 1241.0 + } + ] + }, + "9": { + "nucleotide": [ + { + "qseqid": 9, + "sseqid": 17, + "qlen": 1914, + "slen": 1914, + "qstart": 1, + "qend": 1914, + "sstart": 1, + "send": 1914, + "length": 1914, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 3535 + } + ], + "protein": [ + { + "qseqid": 9, + "sseqid": 17, + "qlen": 638, + "slen": 638, + "qstart": 1, + "qend": 638, + "sstart": 1, + "send": 638, + "length": 638, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 1326.0 + } + ] + }, + "10": { + "nucleotide": [ + { + "qseqid": 10, + "sseqid": 18, + "qlen": 2037, + "slen": 2037, + "qstart": 1, + "qend": 2037, + "sstart": 1, + "send": 2037, + "length": 2037, + "mismatch": 16, + "pident": 99.215, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 3674 + } + ], + "protein": [ + { + "qseqid": 10, + "sseqid": 18, + "qlen": 679, + "slen": 679, + "qstart": 1, + "qend": 679, + "sstart": 1, + "send": 679, + "length": 679, + "mismatch": 8, + "pident": 98.822, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 1403.0 + } + ] + }, + "11": { + "nucleotide": [ + { + "qseqid": 11, + "sseqid": 1, + "qlen": 285, + "slen": 285, + "qstart": 1, + "qend": 285, + "sstart": 1, + "send": 285, + "length": 285, + "mismatch": 17, + "pident": 94.035, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 2.3100000000000004e-124, + "bitscore": 433 + } + ], + "protein": [ + { + "qseqid": 11, + "sseqid": 1, + "qlen": 95, + "slen": 95, + "qstart": 1, + "qend": 95, + "sstart": 1, + "send": 95, + "length": 95, + "mismatch": 11, + "pident": 88.421, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 2.89e-51, + "bitscore": 147.0 + } + ] + }, + "12": { + "nucleotide": [ + { + "qseqid": 12, + "sseqid": 19, + "qlen": 4935, + "slen": 4935, + "qstart": 1, + "qend": 4935, + "sstart": 1, + "send": 4935, + "length": 4935, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 9114 + } + ], + "protein": [ + { + "qseqid": 12, + "sseqid": 19, + "qlen": 1645, + "slen": 1645, + "qstart": 1, + "qend": 1645, + "sstart": 1, + "send": 1645, + "length": 1645, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 3332.0 + } + ] + }, + "13": { + "nucleotide": [ + { + "qseqid": 13, + "sseqid": 2, + "qlen": 327, + "slen": 327, + "qstart": 1, + "qend": 327, + "sstart": 1, + "send": 327, + "length": 327, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 5.34e-176, + "bitscore": 604 + } + ], + "protein": [ + { + "qseqid": 13, + "sseqid": 2, + "qlen": 109, + "slen": 109, + "qstart": 1, + "qend": 109, + "sstart": 1, + "send": 109, + "length": 109, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 3.09e-81, + "bitscore": 224.0 + } + ] + }, + "14": { + "nucleotide": [ + { + "qseqid": 14, + "sseqid": 3, + "qlen": 417, + "slen": 417, + "qstart": 1, + "qend": 417, + "sstart": 1, + "send": 417, + "length": 417, + "mismatch": 11, + "pident": 97.362, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 710 + } + ], + "protein": [ + { + "qseqid": 14, + "sseqid": 3, + "qlen": 139, + "slen": 139, + "qstart": 1, + "qend": 139, + "sstart": 1, + "send": 139, + "length": 139, + "mismatch": 9, + "pident": 93.525, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 3.6999999999999996e-98, + "bitscore": 269.0 + } + ] + }, + "15": { + "nucleotide": [ + { + "qseqid": 15, + "sseqid": 4, + "qlen": 444, + "slen": 444, + "qstart": 1, + "qend": 444, + "sstart": 1, + "send": 444, + "length": 444, + "mismatch": 15, + "pident": 96.622, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 737 + } + ], + "protein": [ + { + "qseqid": 15, + "sseqid": 4, + "qlen": 148, + "slen": 148, + "qstart": 1, + "qend": 148, + "sstart": 1, + "send": 148, + "length": 148, + "mismatch": 11, + "pident": 92.568, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 1.5600000000000001e-97, + "bitscore": 268.0 + } + ] + }, + "16": { + "nucleotide": [ + { + "qseqid": 16, + "sseqid": 5, + "qlen": 543, + "slen": 543, + "qstart": 1, + "qend": 543, + "sstart": 1, + "send": 543, + "length": 543, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1003 + } + ], + "protein": [ + { + "qseqid": 16, + "sseqid": 5, + "qlen": 181, + "slen": 181, + "qstart": 1, + "qend": 181, + "sstart": 1, + "send": 181, + "length": 181, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 1.7899999999999999e-140, + "bitscore": 380.0 + } + ] + }, + "17": { + "nucleotide": [ + { + "qseqid": 17, + "sseqid": 6, + "qlen": 606, + "slen": 606, + "qstart": 1, + "qend": 606, + "sstart": 1, + "send": 606, + "length": 606, + "mismatch": 15, + "pident": 97.525, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1037 + } + ], + "protein": [ + { + "qseqid": 17, + "sseqid": 6, + "qlen": 202, + "slen": 202, + "qstart": 1, + "qend": 202, + "sstart": 1, + "send": 202, + "length": 202, + "mismatch": 14, + "pident": 93.069, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 2.36e-141, + "bitscore": 384.0 + } + ] + }, + "18": { + "nucleotide": [ + { + "qseqid": 18, + "sseqid": 7, + "qlen": 642, + "slen": 642, + "qstart": 1, + "qend": 642, + "sstart": 1, + "send": 642, + "length": 642, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1186 + } + ], + "protein": [ + { + "qseqid": 18, + "sseqid": 7, + "qlen": 214, + "slen": 214, + "qstart": 1, + "qend": 214, + "sstart": 1, + "send": 214, + "length": 214, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 1.75e-157, + "bitscore": 426.0 + } + ] + }, + "19": { + "nucleotide": [ + { + "qseqid": 19, + "sseqid": 8, + "qlen": 684, + "slen": 684, + "qstart": 1, + "qend": 684, + "sstart": 1, + "send": 684, + "length": 684, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1264 + } + ], + "protein": [ + { + "qseqid": 19, + "sseqid": 8, + "qlen": 228, + "slen": 228, + "qstart": 1, + "qend": 228, + "sstart": 1, + "send": 228, + "length": 228, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 6.32e-172, + "bitscore": 463.0 + } + ] + } + }, + "locus_profile": { + "locus_1": { + "nucleotide": [ + "0" + ], + "protein": [ + "0" + ] + }, + "locus_2": { + "nucleotide": [ + "11" + ], + "protein": [ + "11" + ] + }, + "locus_3": { + "nucleotide": [ + "13" + ], + "protein": [ + "13" + ] + }, + "locus_4": { + "nucleotide": [ + "14" + ], + "protein": [ + "14" + ] + }, + "locus_5": { + "nucleotide": [ + "15" + ], + "protein": [ + "15" + ] + }, + "locus_6": { + "nucleotide": [ + "16" + ], + "protein": [ + "16" + ] + }, + "locus_7": { + "nucleotide": [ + "17" + ], + "protein": [ + "17" + ] + }, + "locus_8": { + "nucleotide": [ + "18" + ], + "protein": [ + "18" + ] + }, + "locus_9": { + "nucleotide": [ + "19" + ], + "protein": [ + "19" + ] + }, + "locus_10": { + "nucleotide": [ + "1" + ], + "protein": [ + "1" + ] + }, + "locus_11": { + "nucleotide": [ + "2" + ], + "protein": [ + "2" + ] + }, + "locus_12": { + "nucleotide": [ + "3" + ], + "protein": [ + "3" + ] + }, + "locus_13": { + "nucleotide": [ + "4" + ], + "protein": [ + "4" + ] + }, + "locus_14": { + "nucleotide": [ + "5" + ], + "protein": [ + "5" + ] + }, + "locus_15": { + "nucleotide": [ + "6" + ], + "protein": [ + "6" + ] + }, + "locus_16": { + "nucleotide": [ + "7" + ], + "protein": [ + "7" + ] + }, + "locus_17": { + "nucleotide": [ + "8" + ], + "protein": [ + "8" + ] + }, + "locus_18": { + "nucleotide": [ + "9" + ], + "protein": [ + "9" + ] + }, + "locus_19": { + "nucleotide": [ + "10" + ], + "protein": [ + "10" + ] + }, + "locus_20": { + "nucleotide": [ + "12" + ], + "protein": [ + "12" + ] + } + } + }, + "query_hit_columns": [ + "qseqid", + "sseqid", + "qlen", + "slen", + "qstart", + "qend", + "sstart", + "send", + "length", + "mismatch", + "pident", + "qcovhsp", + "qcovs", + "sstrand", + "evalue", + "bitscore" + ] +} \ No newline at end of file diff --git a/tests/test_data/outputs/search/G6/blast/nucleotide/hsps.txt b/tests/test_data/outputs/search/G6/blast/nucleotide/hsps.txt new file mode 100755 index 0000000..8e411cd --- /dev/null +++ b/tests/test_data/outputs/search/G6/blast/nucleotide/hsps.txt @@ -0,0 +1,20 @@ +0 0 102 102 1 102 1 102 102 2 98.039 100 100 plus 3.90e-48 178 +1 9 762 762 1 762 1 762 762 0 100.000 100 100 plus 0.0 1408 +2 10 858 858 1 858 1 858 858 0 100.000 100 100 plus 0.0 1585 +3 11 972 972 1 972 1 972 972 0 100.000 100 100 plus 0.0 1796 +4 12 1098 1098 1 1098 1 1098 1098 0 100.000 100 100 plus 0.0 2028 +5 13 1281 1281 1 1281 1 1281 1281 0 100.000 100 100 plus 0.0 2366 +6 14 1434 1434 1 1434 1 1434 1434 0 100.000 100 100 plus 0.0 2649 +7 15 1464 1464 1 1464 1 1464 1464 0 100.000 100 100 plus 0.0 2704 +8 16 1836 1836 1 1836 1 1836 1836 0 100.000 100 100 plus 0.0 3391 +9 17 1914 1914 1 1914 1 1914 1914 0 100.000 100 100 plus 0.0 3535 +10 18 2037 2037 1 2037 1 2037 2037 0 100.000 100 100 plus 0.0 3762 +11 1 285 285 1 285 1 285 285 0 100.000 100 100 plus 1.03e-152 527 +12 19 4935 4935 1 4935 1 4935 4935 0 100.000 100 100 plus 0.0 9114 +13 2 327 327 1 327 1 327 327 0 100.000 100 100 plus 5.34e-176 604 +14 3 417 417 1 417 1 417 417 0 100.000 100 100 plus 0.0 771 +15 4 444 444 1 444 1 444 444 0 100.000 100 100 plus 0.0 821 +16 5 543 543 1 543 1 543 543 0 100.000 100 100 plus 0.0 1003 +17 6 606 606 1 606 1 606 606 0 100.000 100 100 plus 0.0 1120 +18 7 642 642 1 642 1 642 642 0 100.000 100 100 plus 0.0 1186 +19 8 684 684 1 684 1 684 684 0 100.000 100 100 plus 0.0 1264 diff --git a/tests/test_data/outputs/search/G6/blast/nucleotide/queries.fasta b/tests/test_data/outputs/search/G6/blast/nucleotide/queries.fasta new file mode 100755 index 0000000..26e0ed8 --- /dev/null +++ b/tests/test_data/outputs/search/G6/blast/nucleotide/queries.fasta @@ -0,0 +1,40 @@ +>0 +atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtgattcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa +>1 +atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa +>2 +gtgaacctggtgaaaaccccgggtctgcacgcggcgggcaaaggcattaacgttgccaatgtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccaacttcaactttcccggctttgacgtgactcccgcagactgggaacgctttgttaacgacttcctgagctggctgggtcagttcgatatggtctgcgtaagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatcgttgatactagccgtgtagagttagtcgccggtcttatagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcccgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgcgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggaaaataccgtcggcgcaggcggttccatggttggcggatggatttacggcctgctgatgcgcgagtccaccgaacatacgctgcgcctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga +>3 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>4 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>5 +ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctattacgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccagcgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcggaaaggcgggattggttcgcgcggtccgggtgaaacccagctcgaagccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgagaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgcagctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaattccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtccgcaggtacgaatgccgattgtcgactggcgtcgcttctgtaaacaagaaccagcgttgatcgaatacgtgatctag +>6 +gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga +>7 +atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctcgtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatctgcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacggtgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcactcaaccggaagggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaaggctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgatgcgctgtcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtaagctgatcagccttgtgcgcaaccttaatcaactgctcaaaagcgagcacgaacgttataacaaataccgcacgagcctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatctgcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcatagcccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttctggccgacagcatcccacacagaaagaggaataa +>8 +atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa +>9 +atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag +>10 +atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacatcgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccacagtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggtcgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattacttgaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcatacagggcggtacggaagtttgtcagcaaaaactggataacatgcgcaacgcaggcgtaaaagtgtacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggggtcagcgcagagctcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacagaagaccggcaagctcggcgagatcctgttctttatgcgggcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccagcaatgactggcagttcgacggcgacgcggaaactattgcacactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgagatttccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa +>11 +atgaaactttttctgacaacggcagcgctaactgcgaccttgacttccggcatgggatttgccagcgataccgtgatcccctgggcgactaacagtggcggtacggaaagcacgcatatcgcggcaatgggcgaggatgtgaatgcgcagcatcagcacatcatccacacgcatgaaggcgtatgtgccgcaaattccggcaccattcaggctgatgaagccgcgctgaccagcaacaagccaccggtacaagtccaaccggaattgttgccgcatcagggctaa +>12 +atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa +>13 +atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa +>14 +ctgaaactaatacctttttatttgctcgcgctattttctgcctcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaacccctgcagataacccgccttataccgtgacaatattgccgccagtagaagcctgcggggaaaaatgtgtgaaactgaacgtagagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaacgacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctacaaccgacagctgcaattacttagggctggcgtaataccgaaaaataaaaagtggaaatggataacgaagatggaaaattactggttcaggcgggtaaaagcgcactaa +>15 +atgaaaaaatttgatagctgtcatcctgtctttttactgatcggctgtgcccaggtgcctctccgttcctccgtgagcaaaccggtacagcaacctagcgctcagaaagagcaactggccaacgcaaatggtattgatgagtgtcagtcacttccgtatgtgccttcagaccttgcgaagaataaatcattatcaaaccagatcgctgataataccgcatcaaaaaatagcgcaatcagctcacgcattttttgcgaaaaatataagcaaaccaaagaacaggcgcttaccttcttccaggaacttccacaatacatgcgttcgaaagaagttgaagagcaacacatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa +>16 +atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa +>17 +gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggactcacaacatgcctgtagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgccagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcgccatccagtcggcaacctggttttcggcaggcaggtcgcgtagggcggttaaaaaaatgatggattgcggcagcggtatttattcttcaatcaatacgttactgaaaaattcgcagattaaaaatatagtcattttaacccataatcattgtttgacgtatattgtaaaaaataagcgcggcgtgaagtttgatccggagtatcttaatgcgctcgttatgtatgcggaaaacggaaaactccttttagacggtgaatttgttcccggctag +>18 +atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga +>19 +atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/search/G6/blast/protein/hsps.txt b/tests/test_data/outputs/search/G6/blast/protein/hsps.txt new file mode 100755 index 0000000..f47c1c9 --- /dev/null +++ b/tests/test_data/outputs/search/G6/blast/protein/hsps.txt @@ -0,0 +1,20 @@ +0 0 34 34 1 34 1 34 34 1 97.059 100 100 N/A 1.28e-18 60.8 +1 9 254 254 1 254 1 254 254 0 100.000 100 100 N/A 0.0 514 +2 10 286 286 1 286 1 286 286 0 100.000 100 100 N/A 0.0 579 +3 11 324 324 1 324 1 324 324 0 100.000 100 100 N/A 0.0 640 +4 12 366 366 1 366 1 366 366 0 100.000 100 100 N/A 0.0 754 +5 13 427 427 1 427 1 427 427 0 100.000 100 100 N/A 0.0 869 +6 14 478 478 1 478 1 478 478 0 100.000 100 100 N/A 0.0 972 +7 15 488 488 1 488 1 488 488 0 100.000 100 100 N/A 0.0 1004 +8 16 612 612 1 612 1 612 612 0 100.000 100 100 N/A 0.0 1241 +9 17 638 638 1 638 1 638 638 0 100.000 100 100 N/A 0.0 1326 +10 18 679 679 1 679 1 679 679 0 100.000 100 100 N/A 0.0 1419 +11 1 95 95 1 95 1 95 95 0 100.000 100 100 N/A 1.23e-68 191 +12 19 1645 1645 1 1645 1 1645 1645 0 100.000 100 100 N/A 0.0 3332 +13 2 109 109 1 109 1 109 109 0 100.000 100 100 N/A 3.09e-81 224 +14 3 139 139 1 139 1 139 139 0 100.000 100 100 N/A 2.34e-106 290 +15 4 148 148 1 148 1 148 148 0 100.000 100 100 N/A 1.04e-110 302 +16 5 181 181 1 181 1 181 181 0 100.000 100 100 N/A 1.79e-140 380 +17 6 202 202 1 202 1 202 202 0 100.000 100 100 N/A 3.71e-154 416 +18 7 214 214 1 214 1 214 214 0 100.000 100 100 N/A 1.75e-157 426 +19 8 228 228 1 228 1 228 228 0 100.000 100 100 N/A 6.32e-172 463 diff --git a/tests/test_data/outputs/search/G6/blast/protein/queries.fasta b/tests/test_data/outputs/search/G6/blast/protein/queries.fasta new file mode 100755 index 0000000..9f022f4 --- /dev/null +++ b/tests/test_data/outputs/search/G6/blast/protein/queries.fasta @@ -0,0 +1,40 @@ +>0 +MYDPPFLEALMITAS*FAIFIIIVVSVLLLEGD* +>1 +MRILLSNDDGVHAPGIQTLAKALREFADVQVVAPDRNRSGASNSLTLESSLRTFTFDNGDIAVQMGTPTDCVYLGVNALMRPRPDIVVSGINAGPNLGDDVIYSGTVAAAMEGRHLGFPALAVSLNGYQHYDTAAAVTCALLRGLSREPLRTGRILNVNVPDLPLAQVKGIRVTRCGSRHPADKVIPQEDPRGNTLYWIGPPGDKYDAGPDTDFAAVDEGYVSVTPLHVDLTAHSAHDVVSDWLDSVGVGTQW* +>2 +VNLVKTPGLHAAGKGINVANVLKDLGIDVTVGGFLGKDNQDGFQQLFSELGIANRFQVVQGRTRINVKLTEKDGEVTNFNFPGFDVTPADWERFVNDFLSWLGQFDMVCVSGSLPAGVSPEAFTDWMTRLRSQCPCIIVDTSRVELVAGLIAAPWLVKPNRRELEIWAGRKLPEMKDVIDAAPALREQGIAHVVISLGAEGALRVNASGEWIAKPPAVDVENTVGAGGSMVGGWIYGLLMRESTEHTLRLATAVAALAVSQSNVGITDRPQLAAMMARVDLQPFN* +>3 +MNKNKYSTPLLMLATILAGMLSPMQSAVNGQLGHWLQDGNACAVISFASGLVVMFFIIIARKETRQQFASIPTLIKKRKIPLWNWFAGLCGAMVVFSEGASASALGVATFQTALISALLLSGLLCDRFGIGVEEKKYFTPWRITGALFAVIATIFVVSPQWHSTSFILLAILPFLAGLLAGWQPAGNAKVAEATGSMLVSITWNFIVGFCVLGAALAIRIALGHVTIQLPDTWWMYLGGPLGLLSIGLMAILVRGLGLLMLGVASTAGQLLGSVLIDELIPSLGNTVYLVTIIGTLFALVGAIVTTIPEYRASKMAQKMEVSE* +>4 +MKGRWAKYVATGVMLAMLAACSSKPTDRGQQYKDGKFTQPFSLVNQPDAVGAPINAGDFAEQVDQIRSASPRLYTNQSNVYNAVQNWLRSGGDTRTMRQFGIDAWQMEGTDNYGNVQFTGYYTPVVQARHTRQGAFQYPIYSMPPKRGRLPSRAQIYAGALSDKYILAWSNSLMDNFIMDVQGSGYIDFGDGSPLNFFSYAGKNGWPYRSIGKVLIDRGEVKKEDMSMQAIREWGEKHSEAEVRELLEQNPSFVFFKPQSFAPVKGASAVPLIGRASVASDRSIIPPGTTLLAEVPLLDNNGKFSGQYELRLMVALDVGGAIKGQHFDIYQGIGPDAGHRAGWYNHYGRVWVLKSAPGAGNVFSG* +>5 +LFDRYDAGEQAVLVHIYFSQDKDMEDLQEFESLVYYAGVEAMQVITGSRKAPHPKYFVGEGKAVEIAEAVKATGAAVVLFDHALSPAQERNLERLCECRVIDRTGLILDIFAQRARTHEGKLQVELAQLRHLATRLVRGWTHLERRKGGIGSRGPGETQLEADRRLLRNRIVQIQSRLEKVEKQREQGRQSRIKADVPTVSLVGYTNAGKSTLFNQITEARVYAADQLFATLDPTLRRIDVADVGETVLADTVGFIRHLPYDLVAAFKATLQETRQATQLLHVVDAADVRVQENIEAVNTVLEEIDAHEFPTLMVMNKIDMLDDFEPRIDRDEENKPIRVWLSAQSGVGIPQLFQALTERLSGEVAQHTLRLLPQEGRLRSRFYQLQAIEKEWMEEDGSVSPQVRMPIVDWRRFCKQEPALIEYVI* +>6 +VAQRILVLGASGYIGQHLVFALSQQGHQVRAAARRVERLEKHRLANVSCHKVDLHWPENLPALLRDIDTVYYLVHGMGEGGDFIAHERQAALNVRDALRQTPVKQLIFLSSLQAPAHEQSDHLRARQLTADTLRDAGVPVTELRAGIIVGAGSAAFEVMRDMVYNLPILTPPRWVRSRTTPIALENLLYYLVGLLEHPAHEHRILEAAGPQVLSYQQQFERFMAVSGKRRPLIPVPFPTRWISVWFLNVITSVPPTTAKALIQGLRHDLLADDAALKKLIPQTLITFDDAVRRTLKEEEKLVNSSDWGYDALAFARWRPEYGYFPKQAGFTAQTPASLSALWQVVNRLGGKEGYFFGNILWQTRAAMDRLVGHKLAKGRPSHTLLKPGDTVDSWKVIIVEPEKQLTLLFGMKAPGLGRLSFTLHDKGRYREIDVRAWWHPHGMPGLIYWLLMIPAHLFIFRGMARRIARLAEQITEK* +>7 +MNKFARHFLPLSLRVRFLLATAGVVLVLSLAYGIVALVGYSVSFDKTTFRLLRGESNLFYTLAKWENNKICVELPENLDMQSPTMTVIYDETGKLLWTQRNIPWLIKSTQPEGLKTNGFHEIETNVDATSTLLSEDHSAQEKLKEVREDDDDAEMTHSVAVNIYPATTRMPQLTIVVVDTIPIELKGSYMVWSWFVYVLAANLLLVIPLLWIAAWWSLRPIDALSREVRELEDHHREMLNPETTRKLISLVRNLNQLLKSEHERYNKYRTSLTDLTHSLKTPLAVLQSTLRSLRNEKMSVSKAEPVMLEQISRISQQIGYYLHRASMRGSGVLLSRELHPVAPLLDNLISALNKVYLRKGVNISMDISPEISFVGEQNDFVEVMGNVLDNACKYCLEFVEISARQTDDHLHIFVEDDGPGIAHSKRSLVFDRGQRADTLRPGQGVGLAVAREITEQYAGQIIASDSLLGGARMEVVSGRQHPTQKEE* +>8 +MTIQKRLLEAVEQKLLRPIDAQFALTVAGNDDPAVTLAAALLSHDAGEGHVCLPLSRLTLTEEAHPLLVAWISETATPIDWKKRLLASAAVSCGDSPAPLILCGDRLYLNRMWCNERTVARFFNEVNQAIAVDEDQLSRILDALFPPTDEVNWQKVAAAVALTRRISVISGGPGTGKTTTVAKLLAALIQMADGERCRIRLAAPTGKAAARLTESLGAALRQLPLTDAQKKRIPEDASTLHRLLGAQPGSQRLRHHAGNPLHLDVLVVDEASMIDLPMMSRLIDALPPHGRVIFLGDRDQLASVEAGAVLGDICAYVNAGFTAERARQLSRLTGSAIPAGAGTQAASLRDSLCLLQKSYRFGSDSGIGKLAAAINCGDRSAIQAVFQQGFSDIEKRTLQSSDDYAGMLDEALAGYGRYLRLLHEKAAPEAILQAFNEYQLLCALREGPFGVRGLNDRIEQAMVQQRKIQRHPHSRWYEGRPVMIARNDSALGLFNGDIGIALDRGQGLRVWFVMPDGTIKSVQPSRLPEHDTTWAMTVHKSQGSEFDHAALILPSQRSPVVTRELVYTAVTRARRRLSLYADERILAGAIVTRTERRSGLATLFDEVSRIG* +>9 +MQEVAMSSQEASKMLRTYNIAWWGNNYYDVNELGHISVCPDPDVPEARVDLAKLVKAREAQGQRLPALFCFPQILQHRLRSINAAFKRARESYGYNGDYFLVYPIKVNQHRRVIESLIHSGEPLGLEAGSKAELMAVLAHAGMTRSVIVCNGYKDREYIRLALIGEKMGHKVYLVIEKMSEIAIVLEEAERLNVVPRLGVRARLASQGSGKWQSSGGEKSKFGLAATQVLQLVETLRDAGRLDSLQLLHFHLGSQMANIRDIATGVRESARFYVELHKLGVNIQCFDVGGGLGVDYEGTRSQSDCSVNYGLNEYANNIIWAIGDACEEHGLPHPTVITESGRAVTAHHTVLVSNIIGVERNEYTDPTAPAEDAPRALQNLWETWQEMHKPGTRRSLREWLHDSQMDLHDIHIGYSSGAFSLQERAWAEQLYLSMCHEVQKQLDPQNRAHRPIIDELQERMADKMYVNFSLFQSMPDAWGIDQLFPVLPLEGLDQVPERRAVLLDITCDSDGAIDHYIDGDGIATTMPMPEYDPENPPMLGFFMVGAYQEILGNMHNLFGDTEAVDVFVFPDGSVEVELSDEGDTVADMLQYVQLDPKTLLTHFRDQVKQTDLDDALQQQFLEEFEAGLYGYTYLEDE* +>10 +MNSLPQRSTDFELTTSQDGFALSWQQRLILRHSAENPCLWIGAGVADIDMFRGNFSIKDKLNEKIALTEATVSELPDGWLVQFSRGATISATLRISADEAGRLTLDLQNDDLHHNRIWLRLAANPDDHIYGCGEQFSYFDLRGKPFPLWTSEQGVGRNKTSYVTWQADCKENAGGDYYLTFFPQPTFVSTQKYYCHVDNSCYMNFDFSAPEYHELALWEDKTTLRFECADTYIALLEKLTALLGRQPELPDWVYDGVTLGIQGGTEVCQQKLDNMRNAGVKVYGIWAQDWSGIRMTSFGKRVMWNWKWNSDNYPQLDSRIKQWKEEGVQFLSYINPYVASDKDLCAEAARHGYLAKDATGGDYLVEFGEFYGGVVDLTNPEAYDWFKDVIKKNMIALGCSGWMADFGEYLPTDTYLHNGVSAELMHNAWPALWAKCNYEALQKTGKLGEILFFMRAGYTGSQKYSTMMWAGDQNVDWSLDDGLASVVPAALSLAMTGHGLHHSDIGGYTTLFDMKRSKELLLRWCDFSAFTPMMRTHEGNRPSNDWQFDGDAETIAHFARMTTVFTTLKPYLKQAVAQNAATGLPVMRPLFLHYENDAATYTLKYQYLLGQDLLVAPVHEQGRCDWTLYLPEDHWVNIWTGEAHHGGEISVDAPIGKPPVFYRAKSEWALLFASLRNI* +>11 +MKLFLTTAALTATLTSGMGFASDTVIPWATNSGGTESTHIAAMGEDVNAQHQHIIHTHEGVCAANSGTIQADEAALTSNKPPVQVQPELLPHQG* +>12 +MKHLRVVACMIMLALAGCDNNDKTAPTTKSEAPAVAQPSPAQDPSQLQKLAQQSQGKALTLLDASEAQLDGAATLVLTFSIPLDPEQDFSRVVHVVDKKSGSVDGAWELAPNLKELRLRHLEPERVLVVTVDPAVKALNNATFGKSYEKTITTRDVQPSVGFASRGSLLPGKIAEGLPVMALNVNHVDVNFFRVKPGSLASFVSQWEYRSSLSNWESDNLLKMADLVYTGRFDLNPARNTREKLLLPLSDIKPLQQAGVYVAVMNQAGHYNYSNAATLFTLSDIGVSAHRYHSRLDIFTQSLENGAAQSGIEIVLLNDKGQTLAQATSDAQGHVQLEADKAAALLLARKEGQTTLLDLTLPALDLSEFNVAGAPGYSKQFFMFGPRDLYRPGETVILNGLLRDSDGKTLPDQPVKLEVVKPDGQVMRTVVSQPENGLYRLNYPLDINAPTGLWHVRANTGDNLLRSWDFHVEDFMPERMALNLTAQKTPLAPADEVKFSVVGYYLYGAPANGNTLQGQLFLRPLRDAVAALPGFQFGNIAEENLSRSLDEVQLTLDKGGRGEVSAASQWQEAHSPLQVILQASLLESGGRPVTRRVEQAIWPADTLPGIRPQFAAKAVYDYRTDTTVNQPIVDEDSNAAFDIVYANAQGEKKAVSGLQVRLIRERRDYYWNWSESEGWQSQFDQKDLVEGEQTLDLNADETGKVSFPVEWGAYRLEVKAPNETVSSVRFWAGYSWQDNSDGSGAARPDRVTLKLDKANYRPGDTMKLHIAAPVAGKGYAMVESSDGPLWWQAIDVPAQGLELTIPVDKTWNRHDLYLSTLVVRPGDKSRSATPKRAVGLLHLPLGDDNRRLDLALESPAKMRPNQPLTVRVKASVKHGEMPKQINVLVSAVDSGVLNITDYATPDPWQAFFGQKRYGADIYDIYGQVIEGQGRLAALRFGGDGDDLTRGGKPPVNHANIIAQQAQPITLNEQGEGVVTLPIGDFNGELRVMAQAWTADDFGRGESKVVVAAPVIAELNMPRFLAGGDVSRLVLDVTNLTDRPQTLNIALAASGLLELLSQQPQPVNLAPGVRTTLFVPVRALEGFGEGEIQATISGLNLPGETLGAQHKQWQIGVRPAWPAQTVNSGIALAPGESWHVPEQHLANVSPATLQGQLLLSGKPPLNLARYIRELKAYPYGCLEQTTSGLFPALYTNAAQLQSLGITGDSDEKRRAAVDIGISRILQMQRDNGGFALWDENGAEEPWLTAYAMDFLIRAGEQGYSVPPEAINRGNERLLRYLQDPGTMLIRYSDNTQASTFAAQAYAALVLARQQKAPLGALREIWERRSQAASGLPLMQLGIALNTMGDARRGEEAITLALNTPRQDERQWIADYGSSLRDNALMLSLLEENNLRPDAQNALLSSLSEQAFGQRWLSTQENNALFLAAHSRQASAGAWQAQTSLEAQPLSGDKALTRNLDADQLAALEVTNTGSQPLWLRLDSSGYPSSAPEPASNVLQIERQILGTDGQRKSLSSLRSGELVLVWLTVVADRNVPDALVVDLLPAGLELENQNLADSSASLPESGSEVQNLLNQMQQADIQYMEFRDDRFVAAVVVNEGQPVTLVYLARAVTPGTYQLAQPQVESMYAPQWRATGASEGLLIVTP* +>13 +MAKNRSRRLRKKMHIDEFQELGFSVAWRFPEGTSEEQIDKTVDDFINDVIEPNKLAFDGSGYLAWEGLICMQEIGKCTEEHQAIVRKWLEARNLEEVRTSELFDVWWD* +>14 +LKLIPFYLLALFSASSGATEINACKDLIGTWKTPADNPPYTVTILPPVEACGEKCVKLNVEYELDVTHRNALYCHERQEGVKGQGPMVIAFEGAYGGHAIGTYNRQLQLLRAGVIPKNKKWKWITKMENYWFRRVKAH* +>15 +MKKFDSCHPVFLLIGCAQVPLRSSVSKPVQQPSAQKEQLANANGIDECQSLPYVPSDLAKNKSLSNQIADNTASKNSAISSRIFCEKYKQTKEQALTFFQELPQYMRSKEVEEQHMTEFKKVLLEPGSKNLSIYQTLLAAHERLQAL* +>16 +MRIKPDDNWRWYYDEEHDRMMLDLANGMLFRSRFSRKMLTPDAFCPTGFCVDDAALYFSFEEKCRDFELTKEQRAELVLNALVAIRYLKPQMPKSWHFVAHGEMWTPGTGDAASVWLSDTAEQVNLLVVEPGENAALCLLAQPGVVIAGRTMQLGDAIKIMNDRLKPQVHCHSFSLEQAV* +>17 +VLAFTLRFIKNKRYFAILAGALVIIAGLDSQHACSGNGLPQINGKALAALAKQHPVVVLFRHAERCDRSDNTCLPDSTGITVNGAQDARALGKAFSADIQNYNLYSSNTVRAIQSATWFSAGRSRRAVKKMMDCGSGIYSSINTLLKNSQIKNIVILTHNHCLTYIVKNKRGVKFDPEYLNALVMYAENGKLLLDGEFVPG* +>18 +MGSNYIVIEGLEGAGKTTARDVVVETLEQLGIRNMIFTREPGGTQLAEKLRSLVLDIRSVGDEVITDKAEVLMFYAARVQLVETVIKPALAQGVWVIGDRHDLSTQAYQGGGRGIDQTMLATLRDAVLGDFRPDLTLYLDVTPEVGLKRARARGDLDRIEQESFDFFNRTRARYLELAAQDSRIRTIDATQPLDAVMRDIRATVTKWVQEQAA* +>19 +MKHIKKSVLVVLLTSHVAHASIVVGGTRLVFDGNNDESSINVENKDSKANLVQSWLSVADPQVTNKQVFIITPPLFRLDAGQKNSIRVIRSGAPLPADRESMYWLNIKGIPSIDDNASANRVEISINTQIKLIYRPPALTKSTPDSQSQQLKWQTAGDVITVNNPTPYYMNFASVTLNSHEVKSATFVPPKSSASFKLSSTAAPHGTVTWRLISDYGMSLEPHSGSF* diff --git a/tests/test_data/outputs/search/G6/run.json b/tests/test_data/outputs/search/G6/run.json new file mode 100755 index 0000000..05fbed0 --- /dev/null +++ b/tests/test_data/outputs/search/G6/run.json @@ -0,0 +1,27 @@ +{ + "analysis_start_time": "10/06/2024 11:12:03", + "parameters": { + "query": "locidex/extract/G6/raw.extracted.seqs.fasta", + "outdir": "locidex/search/G6", + "name": "G6", + "db": "locidex/db", + "config": null, + "min_evalue": 0.0001, + "min_dna_len": 1, + "min_aa_len": 1, + "max_dna_len": 10000000, + "max_aa_len": 10000000, + "min_dna_ident": 80.0, + "min_aa_ident": 80.0, + "min_dna_match_cov": 80.0, + "min_aa_match_cov": 80.0, + "max_target_seqs": 10, + "n_threads": 8, + "format": null, + "translation_table": 11, + "annotate": false, + "force": true + }, + "result_file": "locidex/search/G6/seq_store.json", + "analysis_end_time": "10/06/2024 11:12:06" +} \ No newline at end of file diff --git a/tests/test_data/outputs/search/G6/seq_store.json b/tests/test_data/outputs/search/G6/seq_store.json new file mode 100755 index 0000000..29546b1 --- /dev/null +++ b/tests/test_data/outputs/search/G6/seq_store.json @@ -0,0 +1,1744 @@ +{ + "db_info": { + "db_name": "Locidex Test Database", + "db_version": "1.0.0", + "db_date": "2024/06/06", + "db_author": "James Robertson", + "db_desc": "Using Salmonella GCA_000007545.2 as source data", + "db_num_seqs": 20, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" + }, + "db_seq_info": { + "0": { + "seq_id": 0, + "locus_name": "locus_1", + "locus_name_alt": "SALM_11273", + "locus_product": "!", + "locus_description": "hypothetical protein", + "locus_uid": "1", + "dna_seq_len": 102, + "dna_seq_hash": "d17b02d12afa7f832ee37df6f24a8f55", + "aa_seq_len": 34, + "aa_seq_hash": "84ac553cb45dd790c497c27152888f02", + "dna_min_len": 71.4, + "dna_max_len": 132.6, + "aa_min_len": 23.8, + "aa_max_len": 44.2, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "1": { + "seq_id": 1, + "locus_name": "locus_2", + "locus_name_alt": "SALM_120", + "locus_product": "@", + "locus_description": "outer membrane protein", + "locus_uid": "B", + "dna_seq_len": 285, + "dna_seq_hash": "e35184c8ff18e9116fc8faef20532f56", + "aa_seq_len": 95, + "aa_seq_hash": "2d9e7f7f0d11bf02db860b0799e7924d", + "dna_min_len": 199.5, + "dna_max_len": 370.5, + "aa_min_len": 66.5, + "aa_max_len": 123.5, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "2": { + "seq_id": 2, + "locus_name": "locus_3", + "locus_name_alt": "SALM_2016", + "locus_product": "#", + "locus_description": "tRNA (guanosine(46)-N7)-methyltransferase TrmB", + "locus_uid": "C", + "dna_seq_len": 327, + "dna_seq_hash": "670705cd2a59c4a23a897ac656a888fe", + "aa_seq_len": 109, + "aa_seq_hash": "4277f8bbf1fd6682001da089fea83d04", + "dna_min_len": 228.9, + "dna_max_len": 425.1, + "aa_min_len": 76.3, + "aa_max_len": 141.7, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "3": { + "seq_id": 3, + "locus_name": "locus_4", + "locus_name_alt": "SALM_8644", + "locus_product": "$", + "locus_description": "AZ624_004720", + "locus_uid": "AZ624_004720", + "dna_seq_len": 417, + "dna_seq_hash": "ac1b21798c0f672ad26f5a91ea278590", + "aa_seq_len": 139, + "aa_seq_hash": "0c25367401155278f34832f184ab44e6", + "dna_min_len": 291.9, + "dna_max_len": 542.1, + "aa_min_len": 97.3, + "aa_max_len": 180.7, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "4": { + "seq_id": 4, + "locus_name": "locus_5", + "locus_name_alt": "SALM_1876", + "locus_product": "%", + "locus_description": "SPI-1 type III secretion system invasion lipoprotein InvH", + "locus_uid": "E", + "dna_seq_len": 444, + "dna_seq_hash": "d00defcca8588f21ce16fa1d0ac13389", + "aa_seq_len": 148, + "aa_seq_hash": "c8bf12a8057fc5e541fcd4924136a40d", + "dna_min_len": 310.8, + "dna_max_len": 577.2, + "aa_min_len": 103.6, + "aa_max_len": 192.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "5": { + "seq_id": 5, + "locus_name": "locus_6", + "locus_name_alt": "SALM_640", + "locus_product": "^", + "locus_description": "MOSC domain-containing protein", + "locus_uid": "F", + "dna_seq_len": 543, + "dna_seq_hash": "a11561f2804e2c32c78049f8b9aeb517", + "aa_seq_len": 181, + "aa_seq_hash": "1f6a45ea291940ef2c17ec1cfdd5fbdd", + "dna_min_len": 380.1, + "dna_max_len": 705.9, + "aa_min_len": 126.7, + "aa_max_len": 235.3, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "6": { + "seq_id": 6, + "locus_name": "locus_7", + "locus_name_alt": "SALM_1501", + "locus_product": "&", + "locus_description": "India: Vellore", + "locus_uid": "G", + "dna_seq_len": 606, + "dna_seq_hash": "dc94bf1ec4ff9bed2a1f460cbd958656", + "aa_seq_len": 202, + "aa_seq_hash": "62252b3326997117f127efb88ff09294", + "dna_min_len": 424.2, + "dna_max_len": 787.8, + "aa_min_len": 141.4, + "aa_max_len": 262.6, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "7": { + "seq_id": 7, + "locus_name": "locus_8", + "locus_name_alt": "SALM_756", + "locus_product": "*", + "locus_description": "DNA polymerase III subunit delta'", + "locus_uid": "H", + "dna_seq_len": 642, + "dna_seq_hash": "7ebe74afecf146ec4db816c8deced64f", + "aa_seq_len": 214, + "aa_seq_hash": "2449629747a7c58f0f2cad411db87178", + "dna_min_len": 449.4, + "dna_max_len": 834.6, + "aa_min_len": 149.8, + "aa_max_len": 278.2, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "8": { + "seq_id": 8, + "locus_name": "locus_9", + "locus_name_alt": "SALM_7353", + "locus_product": "1", + "locus_description": "fimbrial assembly chaperone", + "locus_uid": "I", + "dna_seq_len": 684, + "dna_seq_hash": "41ebb36872854b2b33c8c028e23d8ad1", + "aa_seq_len": 228, + "aa_seq_hash": "a75a93991228940fb46d917f238beb5a", + "dna_min_len": 478.8, + "dna_max_len": 889.2, + "aa_min_len": 159.6, + "aa_max_len": 296.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "9": { + "seq_id": 9, + "locus_name": "locus_10", + "locus_name_alt": "SALM_1891", + "locus_product": "200.96", + "locus_description": "5'/3'-nucleotidase SurE", + "locus_uid": "J", + "dna_seq_len": 762, + "dna_seq_hash": "fe04d17ec353c08b903c85fc0ca4dc02", + "aa_seq_len": 254, + "aa_seq_hash": "bd09702e070040e0fc8d2ec3b830812c", + "dna_min_len": 533.4, + "dna_max_len": 990.6, + "aa_min_len": 177.8, + "aa_max_len": 330.2, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "10": { + "seq_id": 10, + "locus_name": "locus_11", + "locus_name_alt": "SALM_1452", + "locus_product": "|", + "locus_description": "1-phosphofructokinase", + "locus_uid": "K", + "dna_seq_len": 858, + "dna_seq_hash": "5b128d659955716833ce42f2bb060212", + "aa_seq_len": 286, + "aa_seq_hash": "4daecf1a6ccae76e1d97a4ce9ee4ff0b", + "dna_min_len": 600.6, + "dna_max_len": 1115.4, + "aa_min_len": 200.2, + "aa_max_len": 371.8, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "11": { + "seq_id": 11, + "locus_name": "locus_12", + "locus_name_alt": "SALM_11020", + "locus_product": "_", + "locus_description": "1", + "locus_uid": "L", + "dna_seq_len": 972, + "dna_seq_hash": "eb72da68c159497d5f0c8eeddc51b5ae", + "aa_seq_len": 324, + "aa_seq_hash": "a1f16dd269dc295e715f2d00f9c26b43", + "dna_min_len": 680.4, + "dna_max_len": 1263.6, + "aa_min_len": 226.8, + "aa_max_len": 421.2, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "12": { + "seq_id": 12, + "locus_name": "locus_13", + "locus_name_alt": "SALM_1934", + "locus_product": "-", + "locus_description": "3.9", + "locus_uid": "M", + "dna_seq_len": 1098, + "dna_seq_hash": "8f300259dcb46224bdc1fe5273107324", + "aa_seq_len": 366, + "aa_seq_hash": "b16bc26e42f5bc4327504b1ec8b2d53d", + "dna_min_len": 768.6, + "dna_max_len": 1427.4, + "aa_min_len": 256.2, + "aa_max_len": 475.8, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "13": { + "seq_id": 13, + "locus_name": "locus_14", + "locus_name_alt": "SALM_2871", + "locus_product": "+", + "locus_description": "@", + "locus_uid": "N", + "dna_seq_len": 1281, + "dna_seq_hash": "b9060019038526aa6fc38d2f7510edc6", + "aa_seq_len": 427, + "aa_seq_hash": "ed11561b2bedaa12c7a28eb0e9346101", + "dna_min_len": 896.7, + "dna_max_len": 1665.3, + "aa_min_len": 298.9, + "aa_max_len": 555.1, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "14": { + "seq_id": 14, + "locus_name": "locus_15", + "locus_name_alt": "SALM_583", + "locus_product": "=", + "locus_description": "DMT family transporter", + "locus_uid": "O", + "dna_seq_len": 1434, + "dna_seq_hash": "bc98c2fe196a68a79036814396513a8d", + "aa_seq_len": 478, + "aa_seq_hash": "16cb2acec887d5861327e04f2705b8ce", + "dna_min_len": 1003.8, + "dna_max_len": 1864.2, + "aa_min_len": 334.6, + "aa_max_len": 621.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "15": { + "seq_id": 15, + "locus_name": "locus_16", + "locus_name_alt": "SALM_780", + "locus_product": "<", + "locus_description": "murein transglycosylase A", + "locus_uid": "P", + "dna_seq_len": 1464, + "dna_seq_hash": "16e55766c603fe33c9e75d8e81743ae2", + "aa_seq_len": 488, + "aa_seq_hash": "b20314e55f9713235e9c4ea5817b56df", + "dna_min_len": 1024.8, + "dna_max_len": 1903.2, + "aa_min_len": 341.6, + "aa_max_len": 634.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "16": { + "seq_id": 16, + "locus_name": "locus_17", + "locus_name_alt": "SALM_1937", + "locus_product": ">", + "locus_description": "GTPase HflX", + "locus_uid": "Q", + "dna_seq_len": 1836, + "dna_seq_hash": "a0d97d985483413f3c18bfe5833ae9ce", + "aa_seq_len": 612, + "aa_seq_hash": "d6012eddc7a6e8d7d40761d00ed71a5a", + "dna_min_len": 1285.2, + "dna_max_len": 2386.8, + "aa_min_len": 428.4, + "aa_max_len": 795.6, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "17": { + "seq_id": 17, + "locus_name": "locus_18", + "locus_name_alt": "SALM_1997", + "locus_product": "?", + "locus_description": "biosynthetic arginine decarboxylase", + "locus_uid": "R", + "dna_seq_len": 1914, + "dna_seq_hash": "b3021e979faa7600756c06dfadfcf14c", + "aa_seq_len": 638, + "aa_seq_hash": "e68e83956ee1d4c685571e5348c8def1", + "dna_min_len": 1339.8, + "dna_max_len": 2488.2, + "aa_min_len": 446.6, + "aa_max_len": 829.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "18": { + "seq_id": 18, + "locus_name": "locus_19", + "locus_name_alt": "SALM_9926", + "locus_product": ",", + "locus_description": "https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662", + "locus_uid": "S", + "dna_seq_len": 2037, + "dna_seq_hash": "a012eee23637b48e39b00808a057e35d", + "aa_seq_len": 679, + "aa_seq_hash": "1302fea1dbd5bb756db34e09b863ad44", + "dna_min_len": 1425.9, + "dna_max_len": 2648.1, + "aa_min_len": 475.3, + "aa_max_len": 882.7, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "19": { + "seq_id": 19, + "locus_name": "locus_20", + "locus_name_alt": "SALM_6064", + "locus_product": ".", + "locus_description": "alpha-2-macroglobulin family protein", + "locus_uid": "T", + "dna_seq_len": 4935, + "dna_seq_hash": "4461918e985715e4a2b07494e1f91326", + "aa_seq_len": 1645, + "aa_seq_hash": "12ff39a1933fa478729b142976b0a659", + "dna_min_len": 3454.5, + "dna_max_len": 6415.5, + "aa_min_len": 1151.5, + "aa_max_len": 2138.5, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + } + }, + "query_data": { + "sample_name": "G6", + "query_seq_data": { + "0": { + "parent_id": "locus_1:0:0:0", + "locus_name": "locus_1:0:0:0", + "seq_id": "locus_1:0:0:0", + "dna_hash": "a47cc24760462371e919143c5cc81376", + "dna_len": 102, + "aa_hash": "d65fe5c591a0b644f991adbc1b300a75", + "aa_len": 34, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 1, + "dna_ambig_count": 0 + }, + "1": { + "parent_id": "locus_10:9:0:1", + "locus_name": "locus_10:9:0:1", + "seq_id": "locus_10:9:0:1", + "dna_hash": "fe04d17ec353c08b903c85fc0ca4dc02", + "dna_len": 762, + "aa_hash": "988bf512f0362e276b0e5622fbaa7079", + "aa_len": 254, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "2": { + "parent_id": "locus_11:10:0:2", + "locus_name": "locus_11:10:0:2", + "seq_id": "locus_11:10:0:2", + "dna_hash": "5b128d659955716833ce42f2bb060212", + "dna_len": 858, + "aa_hash": "d6a46f107d0604f27820147b523948c8", + "aa_len": 286, + "start_codon": "gtg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "3": { + "parent_id": "locus_12:11:0:3", + "locus_name": "locus_12:11:0:3", + "seq_id": "locus_12:11:0:3", + "dna_hash": "eb72da68c159497d5f0c8eeddc51b5ae", + "dna_len": 972, + "aa_hash": "2fb77f6d0f615a030a08af74f3eda277", + "aa_len": 324, + "start_codon": "atg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "4": { + "parent_id": "locus_13:12:0:4", + "locus_name": "locus_13:12:0:4", + "seq_id": "locus_13:12:0:4", + "dna_hash": "8f300259dcb46224bdc1fe5273107324", + "dna_len": 1098, + "aa_hash": "fd5140611af6e0d9f95426bf9beaa8b1", + "aa_len": 366, + "start_codon": "atg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "5": { + "parent_id": "locus_14:13:0:5", + "locus_name": "locus_14:13:0:5", + "seq_id": "locus_14:13:0:5", + "dna_hash": "b9060019038526aa6fc38d2f7510edc6", + "dna_len": 1281, + "aa_hash": "05bc7823b1abc2e6d4e2c08ca5325134", + "aa_len": 427, + "start_codon": "ttg", + "stop_codon": "tag", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "6": { + "parent_id": "locus_15:14:0:6", + "locus_name": "locus_15:14:0:6", + "seq_id": "locus_15:14:0:6", + "dna_hash": "bc98c2fe196a68a79036814396513a8d", + "dna_len": 1434, + "aa_hash": "5d8c3cf06444c3741ebfc57cc92db0d3", + "aa_len": 478, + "start_codon": "gtg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "7": { + "parent_id": "locus_16:15:0:7", + "locus_name": "locus_16:15:0:7", + "seq_id": "locus_16:15:0:7", + "dna_hash": "16e55766c603fe33c9e75d8e81743ae2", + "dna_len": 1464, + "aa_hash": "f85b3701f5642454bf4d2263feb13354", + "aa_len": 488, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "8": { + "parent_id": "locus_17:16:0:8", + "locus_name": "locus_17:16:0:8", + "seq_id": "locus_17:16:0:8", + "dna_hash": "a0d97d985483413f3c18bfe5833ae9ce", + "dna_len": 1836, + "aa_hash": "bdd2b967a7a6337b5973dd723a62fa43", + "aa_len": 612, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "9": { + "parent_id": "locus_18:17:0:9", + "locus_name": "locus_18:17:0:9", + "seq_id": "locus_18:17:0:9", + "dna_hash": "b3021e979faa7600756c06dfadfcf14c", + "dna_len": 1914, + "aa_hash": "42c4a831ee79a27c47138fe96829814b", + "aa_len": 638, + "start_codon": "atg", + "stop_codon": "tag", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "10": { + "parent_id": "locus_19:18:0:10", + "locus_name": "locus_19:18:0:10", + "seq_id": "locus_19:18:0:10", + "dna_hash": "a012eee23637b48e39b00808a057e35d", + "dna_len": 2037, + "aa_hash": "cb1202450e68e2b4f0d557a645f1a98d", + "aa_len": 679, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "11": { + "parent_id": "locus_2:1:0:11", + "locus_name": "locus_2:1:0:11", + "seq_id": "locus_2:1:0:11", + "dna_hash": "e35184c8ff18e9116fc8faef20532f56", + "dna_len": 285, + "aa_hash": "2a1a77c25ad681437705d9145aef608c", + "aa_len": 95, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "12": { + "parent_id": "locus_20:19:0:12", + "locus_name": "locus_20:19:0:12", + "seq_id": "locus_20:19:0:12", + "dna_hash": "4461918e985715e4a2b07494e1f91326", + "dna_len": 4935, + "aa_hash": "ffe361d9273afbab0ed9009dd5aa4041", + "aa_len": 1645, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "13": { + "parent_id": "locus_3:2:0:13", + "locus_name": "locus_3:2:0:13", + "seq_id": "locus_3:2:0:13", + "dna_hash": "670705cd2a59c4a23a897ac656a888fe", + "dna_len": 327, + "aa_hash": "f46f4f7b2362abaf1440c9752e10670e", + "aa_len": 109, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "14": { + "parent_id": "locus_4:3:0:14", + "locus_name": "locus_4:3:0:14", + "seq_id": "locus_4:3:0:14", + "dna_hash": "ac1b21798c0f672ad26f5a91ea278590", + "dna_len": 417, + "aa_hash": "dbcec3a0e9ecdc165c4e9162b079f2ee", + "aa_len": 139, + "start_codon": "ctg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "15": { + "parent_id": "locus_5:4:0:15", + "locus_name": "locus_5:4:0:15", + "seq_id": "locus_5:4:0:15", + "dna_hash": "d00defcca8588f21ce16fa1d0ac13389", + "dna_len": 444, + "aa_hash": "82d8baa0a3dad18a0efd8104ee15baae", + "aa_len": 148, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "16": { + "parent_id": "locus_6:5:0:16", + "locus_name": "locus_6:5:0:16", + "seq_id": "locus_6:5:0:16", + "dna_hash": "a11561f2804e2c32c78049f8b9aeb517", + "dna_len": 543, + "aa_hash": "f7a904be6607ca5a6d9388ad9ca3693a", + "aa_len": 181, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "17": { + "parent_id": "locus_7:6:0:17", + "locus_name": "locus_7:6:0:17", + "seq_id": "locus_7:6:0:17", + "dna_hash": "dc94bf1ec4ff9bed2a1f460cbd958656", + "dna_len": 606, + "aa_hash": "da78b534d889d8f35bec304ef54f1b93", + "aa_len": 202, + "start_codon": "gtg", + "stop_codon": "tag", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "18": { + "parent_id": "locus_8:7:0:18", + "locus_name": "locus_8:7:0:18", + "seq_id": "locus_8:7:0:18", + "dna_hash": "7ebe74afecf146ec4db816c8deced64f", + "dna_len": 642, + "aa_hash": "ad5a47cb3a55985083b963b0cf67f382", + "aa_len": 214, + "start_codon": "atg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "19": { + "parent_id": "locus_9:8:0:19", + "locus_name": "locus_9:8:0:19", + "seq_id": "locus_9:8:0:19", + "dna_hash": "41ebb36872854b2b33c8c028e23d8ad1", + "dna_len": 684, + "aa_hash": "02989d1dbb505d15f06e76ddc9174b7e", + "aa_len": 228, + "start_codon": "atg", + "stop_codon": "tag", + "count_internal_stop": 0, + "dna_ambig_count": 0 + } + }, + "query_hit_columns": [], + "query_hits": { + "0": { + "nucleotide": [ + { + "qseqid": 0, + "sseqid": 0, + "qlen": 102, + "slen": 102, + "qstart": 1, + "qend": 102, + "sstart": 1, + "send": 102, + "length": 102, + "mismatch": 2, + "pident": 98.039, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 3.8999999999999997e-48, + "bitscore": 178 + } + ], + "protein": [ + { + "qseqid": 0, + "sseqid": 0, + "qlen": 34, + "slen": 34, + "qstart": 1, + "qend": 34, + "sstart": 1, + "send": 34, + "length": 34, + "mismatch": 1, + "pident": 97.059, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 1.28e-18, + "bitscore": 60.8 + } + ] + }, + "1": { + "nucleotide": [ + { + "qseqid": 1, + "sseqid": 9, + "qlen": 762, + "slen": 762, + "qstart": 1, + "qend": 762, + "sstart": 1, + "send": 762, + "length": 762, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1408 + } + ], + "protein": [ + { + "qseqid": 1, + "sseqid": 9, + "qlen": 254, + "slen": 254, + "qstart": 1, + "qend": 254, + "sstart": 1, + "send": 254, + "length": 254, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 514.0 + } + ] + }, + "2": { + "nucleotide": [ + { + "qseqid": 2, + "sseqid": 10, + "qlen": 858, + "slen": 858, + "qstart": 1, + "qend": 858, + "sstart": 1, + "send": 858, + "length": 858, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1585 + } + ], + "protein": [ + { + "qseqid": 2, + "sseqid": 10, + "qlen": 286, + "slen": 286, + "qstart": 1, + "qend": 286, + "sstart": 1, + "send": 286, + "length": 286, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 579.0 + } + ] + }, + "3": { + "nucleotide": [ + { + "qseqid": 3, + "sseqid": 11, + "qlen": 972, + "slen": 972, + "qstart": 1, + "qend": 972, + "sstart": 1, + "send": 972, + "length": 972, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1796 + } + ], + "protein": [ + { + "qseqid": 3, + "sseqid": 11, + "qlen": 324, + "slen": 324, + "qstart": 1, + "qend": 324, + "sstart": 1, + "send": 324, + "length": 324, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 640.0 + } + ] + }, + "4": { + "nucleotide": [ + { + "qseqid": 4, + "sseqid": 12, + "qlen": 1098, + "slen": 1098, + "qstart": 1, + "qend": 1098, + "sstart": 1, + "send": 1098, + "length": 1098, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 2028 + } + ], + "protein": [ + { + "qseqid": 4, + "sseqid": 12, + "qlen": 366, + "slen": 366, + "qstart": 1, + "qend": 366, + "sstart": 1, + "send": 366, + "length": 366, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 754.0 + } + ] + }, + "5": { + "nucleotide": [ + { + "qseqid": 5, + "sseqid": 13, + "qlen": 1281, + "slen": 1281, + "qstart": 1, + "qend": 1281, + "sstart": 1, + "send": 1281, + "length": 1281, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 2366 + } + ], + "protein": [ + { + "qseqid": 5, + "sseqid": 13, + "qlen": 427, + "slen": 427, + "qstart": 1, + "qend": 427, + "sstart": 1, + "send": 427, + "length": 427, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 869.0 + } + ] + }, + "6": { + "nucleotide": [ + { + "qseqid": 6, + "sseqid": 14, + "qlen": 1434, + "slen": 1434, + "qstart": 1, + "qend": 1434, + "sstart": 1, + "send": 1434, + "length": 1434, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 2649 + } + ], + "protein": [ + { + "qseqid": 6, + "sseqid": 14, + "qlen": 478, + "slen": 478, + "qstart": 1, + "qend": 478, + "sstart": 1, + "send": 478, + "length": 478, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 972.0 + } + ] + }, + "7": { + "nucleotide": [ + { + "qseqid": 7, + "sseqid": 15, + "qlen": 1464, + "slen": 1464, + "qstart": 1, + "qend": 1464, + "sstart": 1, + "send": 1464, + "length": 1464, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 2704 + } + ], + "protein": [ + { + "qseqid": 7, + "sseqid": 15, + "qlen": 488, + "slen": 488, + "qstart": 1, + "qend": 488, + "sstart": 1, + "send": 488, + "length": 488, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 1004.0 + } + ] + }, + "8": { + "nucleotide": [ + { + "qseqid": 8, + "sseqid": 16, + "qlen": 1836, + "slen": 1836, + "qstart": 1, + "qend": 1836, + "sstart": 1, + "send": 1836, + "length": 1836, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 3391 + } + ], + "protein": [ + { + "qseqid": 8, + "sseqid": 16, + "qlen": 612, + "slen": 612, + "qstart": 1, + "qend": 612, + "sstart": 1, + "send": 612, + "length": 612, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 1241.0 + } + ] + }, + "9": { + "nucleotide": [ + { + "qseqid": 9, + "sseqid": 17, + "qlen": 1914, + "slen": 1914, + "qstart": 1, + "qend": 1914, + "sstart": 1, + "send": 1914, + "length": 1914, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 3535 + } + ], + "protein": [ + { + "qseqid": 9, + "sseqid": 17, + "qlen": 638, + "slen": 638, + "qstart": 1, + "qend": 638, + "sstart": 1, + "send": 638, + "length": 638, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 1326.0 + } + ] + }, + "10": { + "nucleotide": [ + { + "qseqid": 10, + "sseqid": 18, + "qlen": 2037, + "slen": 2037, + "qstart": 1, + "qend": 2037, + "sstart": 1, + "send": 2037, + "length": 2037, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 3762 + } + ], + "protein": [ + { + "qseqid": 10, + "sseqid": 18, + "qlen": 679, + "slen": 679, + "qstart": 1, + "qend": 679, + "sstart": 1, + "send": 679, + "length": 679, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 1419.0 + } + ] + }, + "11": { + "nucleotide": [ + { + "qseqid": 11, + "sseqid": 1, + "qlen": 285, + "slen": 285, + "qstart": 1, + "qend": 285, + "sstart": 1, + "send": 285, + "length": 285, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 1.03e-152, + "bitscore": 527 + } + ], + "protein": [ + { + "qseqid": 11, + "sseqid": 1, + "qlen": 95, + "slen": 95, + "qstart": 1, + "qend": 95, + "sstart": 1, + "send": 95, + "length": 95, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 1.2299999999999998e-68, + "bitscore": 191.0 + } + ] + }, + "12": { + "nucleotide": [ + { + "qseqid": 12, + "sseqid": 19, + "qlen": 4935, + "slen": 4935, + "qstart": 1, + "qend": 4935, + "sstart": 1, + "send": 4935, + "length": 4935, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 9114 + } + ], + "protein": [ + { + "qseqid": 12, + "sseqid": 19, + "qlen": 1645, + "slen": 1645, + "qstart": 1, + "qend": 1645, + "sstart": 1, + "send": 1645, + "length": 1645, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 3332.0 + } + ] + }, + "13": { + "nucleotide": [ + { + "qseqid": 13, + "sseqid": 2, + "qlen": 327, + "slen": 327, + "qstart": 1, + "qend": 327, + "sstart": 1, + "send": 327, + "length": 327, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 5.34e-176, + "bitscore": 604 + } + ], + "protein": [ + { + "qseqid": 13, + "sseqid": 2, + "qlen": 109, + "slen": 109, + "qstart": 1, + "qend": 109, + "sstart": 1, + "send": 109, + "length": 109, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 3.09e-81, + "bitscore": 224.0 + } + ] + }, + "14": { + "nucleotide": [ + { + "qseqid": 14, + "sseqid": 3, + "qlen": 417, + "slen": 417, + "qstart": 1, + "qend": 417, + "sstart": 1, + "send": 417, + "length": 417, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 771 + } + ], + "protein": [ + { + "qseqid": 14, + "sseqid": 3, + "qlen": 139, + "slen": 139, + "qstart": 1, + "qend": 139, + "sstart": 1, + "send": 139, + "length": 139, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 2.34e-106, + "bitscore": 290.0 + } + ] + }, + "15": { + "nucleotide": [ + { + "qseqid": 15, + "sseqid": 4, + "qlen": 444, + "slen": 444, + "qstart": 1, + "qend": 444, + "sstart": 1, + "send": 444, + "length": 444, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 821 + } + ], + "protein": [ + { + "qseqid": 15, + "sseqid": 4, + "qlen": 148, + "slen": 148, + "qstart": 1, + "qend": 148, + "sstart": 1, + "send": 148, + "length": 148, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 1.0400000000000001e-110, + "bitscore": 302.0 + } + ] + }, + "16": { + "nucleotide": [ + { + "qseqid": 16, + "sseqid": 5, + "qlen": 543, + "slen": 543, + "qstart": 1, + "qend": 543, + "sstart": 1, + "send": 543, + "length": 543, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1003 + } + ], + "protein": [ + { + "qseqid": 16, + "sseqid": 5, + "qlen": 181, + "slen": 181, + "qstart": 1, + "qend": 181, + "sstart": 1, + "send": 181, + "length": 181, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 1.7899999999999999e-140, + "bitscore": 380.0 + } + ] + }, + "17": { + "nucleotide": [ + { + "qseqid": 17, + "sseqid": 6, + "qlen": 606, + "slen": 606, + "qstart": 1, + "qend": 606, + "sstart": 1, + "send": 606, + "length": 606, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1120 + } + ], + "protein": [ + { + "qseqid": 17, + "sseqid": 6, + "qlen": 202, + "slen": 202, + "qstart": 1, + "qend": 202, + "sstart": 1, + "send": 202, + "length": 202, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 3.71e-154, + "bitscore": 416.0 + } + ] + }, + "18": { + "nucleotide": [ + { + "qseqid": 18, + "sseqid": 7, + "qlen": 642, + "slen": 642, + "qstart": 1, + "qend": 642, + "sstart": 1, + "send": 642, + "length": 642, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1186 + } + ], + "protein": [ + { + "qseqid": 18, + "sseqid": 7, + "qlen": 214, + "slen": 214, + "qstart": 1, + "qend": 214, + "sstart": 1, + "send": 214, + "length": 214, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 1.75e-157, + "bitscore": 426.0 + } + ] + }, + "19": { + "nucleotide": [ + { + "qseqid": 19, + "sseqid": 8, + "qlen": 684, + "slen": 684, + "qstart": 1, + "qend": 684, + "sstart": 1, + "send": 684, + "length": 684, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1264 + } + ], + "protein": [ + { + "qseqid": 19, + "sseqid": 8, + "qlen": 228, + "slen": 228, + "qstart": 1, + "qend": 228, + "sstart": 1, + "send": 228, + "length": 228, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 6.32e-172, + "bitscore": 463.0 + } + ] + } + }, + "locus_profile": { + "locus_1": { + "nucleotide": [ + "0" + ], + "protein": [ + "0" + ] + }, + "locus_2": { + "nucleotide": [ + "11" + ], + "protein": [ + "11" + ] + }, + "locus_3": { + "nucleotide": [ + "13" + ], + "protein": [ + "13" + ] + }, + "locus_4": { + "nucleotide": [ + "14" + ], + "protein": [ + "14" + ] + }, + "locus_5": { + "nucleotide": [ + "15" + ], + "protein": [ + "15" + ] + }, + "locus_6": { + "nucleotide": [ + "16" + ], + "protein": [ + "16" + ] + }, + "locus_7": { + "nucleotide": [ + "17" + ], + "protein": [ + "17" + ] + }, + "locus_8": { + "nucleotide": [ + "18" + ], + "protein": [ + "18" + ] + }, + "locus_9": { + "nucleotide": [ + "19" + ], + "protein": [ + "19" + ] + }, + "locus_10": { + "nucleotide": [ + "1" + ], + "protein": [ + "1" + ] + }, + "locus_11": { + "nucleotide": [ + "2" + ], + "protein": [ + "2" + ] + }, + "locus_12": { + "nucleotide": [ + "3" + ], + "protein": [ + "3" + ] + }, + "locus_13": { + "nucleotide": [ + "4" + ], + "protein": [ + "4" + ] + }, + "locus_14": { + "nucleotide": [ + "5" + ], + "protein": [ + "5" + ] + }, + "locus_15": { + "nucleotide": [ + "6" + ], + "protein": [ + "6" + ] + }, + "locus_16": { + "nucleotide": [ + "7" + ], + "protein": [ + "7" + ] + }, + "locus_17": { + "nucleotide": [ + "8" + ], + "protein": [ + "8" + ] + }, + "locus_18": { + "nucleotide": [ + "9" + ], + "protein": [ + "9" + ] + }, + "locus_19": { + "nucleotide": [ + "10" + ], + "protein": [ + "10" + ] + }, + "locus_20": { + "nucleotide": [ + "12" + ], + "protein": [ + "12" + ] + } + } + }, + "query_hit_columns": [ + "qseqid", + "sseqid", + "qlen", + "slen", + "qstart", + "qend", + "sstart", + "send", + "length", + "mismatch", + "pident", + "qcovhsp", + "qcovs", + "sstrand", + "evalue", + "bitscore" + ] +} \ No newline at end of file diff --git a/tests/test_data/outputs/search/G7/blast/nucleotide/hsps.txt b/tests/test_data/outputs/search/G7/blast/nucleotide/hsps.txt new file mode 100755 index 0000000..61d17b6 --- /dev/null +++ b/tests/test_data/outputs/search/G7/blast/nucleotide/hsps.txt @@ -0,0 +1,20 @@ +0 0 102 102 1 102 1 102 102 0 100.000 100 100 plus 1.80e-51 189 +1 9 762 762 3 762 3 762 760 0 100.000 99 99 plus 0.0 1404 +2 10 858 858 1 858 1 858 858 19 97.786 100 100 plus 0.0 1480 +3 11 972 972 1 972 1 972 972 0 100.000 100 100 plus 0.0 1796 +4 12 1098 1098 1 1098 1 1098 1098 0 100.000 100 100 plus 0.0 2028 +5 13 1281 1281 1 1281 1 1281 1281 11 99.141 100 100 plus 0.0 2305 +6 14 1434 1434 1 1434 1 1434 1434 0 100.000 100 100 plus 0.0 2649 +7 15 1464 1464 1 1464 1 1464 1464 15 98.975 100 100 plus 0.0 2621 +8 16 1836 1836 1 1836 1 1836 1836 0 100.000 100 100 plus 0.0 3391 +9 17 1914 1914 1 1914 1 1914 1914 0 100.000 100 100 plus 0.0 3535 +10 18 2037 2037 1 2037 1 2037 2037 16 99.215 100 100 plus 0.0 3674 +11 1 285 285 1 285 1 285 285 17 94.035 100 100 plus 2.31e-124 433 +12 19 4935 4935 1 4935 1 4935 4935 0 100.000 100 100 plus 0.0 9114 +13 2 327 327 1 327 1 327 327 0 100.000 100 100 plus 5.34e-176 604 +14 3 417 417 1 417 1 417 417 11 97.362 100 100 plus 0.0 710 +15 4 444 444 1 444 1 444 444 15 96.622 100 100 plus 0.0 737 +16 5 543 543 1 543 1 543 543 0 100.000 100 100 plus 0.0 1003 +17 6 606 606 1 606 1 606 606 15 97.525 100 100 plus 0.0 1037 +18 7 642 642 1 642 1 642 642 0 100.000 100 100 plus 0.0 1186 +19 8 684 684 1 684 1 684 684 0 100.000 100 100 plus 0.0 1264 diff --git a/tests/test_data/outputs/search/G7/blast/nucleotide/queries.fasta b/tests/test_data/outputs/search/G7/blast/nucleotide/queries.fasta new file mode 100755 index 0000000..b762e62 --- /dev/null +++ b/tests/test_data/outputs/search/G7/blast/nucleotide/queries.fasta @@ -0,0 +1,40 @@ +>0 +atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa +>1 +gggcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa +>2 +gtgaacctggtgaaaaccacgggtctgcacgcggcgggcaaaggcattaacgttgccaaagtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccgacttcaacttttccggctttgacgtgactcccgcagactgggaacgctttgttaacgactccctgagctggctgggtcagttcgatatggtctgcgtcagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatctttgatagtagccgtgaagcgttagtcgccggtcttaaagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcacgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgtgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggtaagtaccgtcggcgcaggcgattccatggttggcggattgatttacggcctgctgatgcgcgagtccagcgaacatacgctgcgtctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga +>3 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>4 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>5 +ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctcttccgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccaacgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcagaaaggcgggattggtttgcgcggtccgggtgaaacccagctcgaaaccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgaaaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgctgctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaaatccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtctgcaggtacgaatgccgattgtcgactggcgtcgcctctgtaaacaagaaccagcgttgatcgaatacgtgatctag +>6 +gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga +>7 +atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctggtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatcagcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacgctgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcattcaaccggaatggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaacgctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgaggcgctggcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtgagctgaccagccttgtgcgcaaccttaatcaactgctcaaaagcgagcgcgaacgttataacaaataccgcacgaccctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatcagcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcataccccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttttggccgacagcatcccacacagaaagaggaataa +>8 +atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa +>9 +atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag +>10 +atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacattgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccaccgtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggccgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattactggaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcattcagggcggtacggaagtttgtcagcaaaaactggataccatgcgcaacgcaggcgtaaaagtgaacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggcgtcagcgcagagatcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacaggagaccggcaagctcggcgagatcctgttctttatgcgtgcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccggcaataactggcagttcgacggcgacgcggaaactattgcccactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgaaattaccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa +>11 +atgaaactttttctgacaacggcagcgctaactgcgaccttgatttccggcatggcatttgccagcgatcccgtcatcccctgggcgactaacagcggcggtacggaaagcacgcatattgcggcaatgggcgaggatttgaatgcgcagcatcagcagatcacccacacgcatgaaggcgtatgggcggccaattccggcagcattcaggctgatgaagccgcgctgaccagcaacaagccgccggtacaaggccacccggaattgatgccgcatcagggctaa +>12 +atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa +>13 +atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa +>14 +ctgaaactaatacctttttatttgctcgcgctattttctgccgcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaaccactgcagataacccgccttataccatgacaatattgccgccagtagaaggctgcggggaaaaatgtgtgaaactgaacgtacagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaaggacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctataaccgacagctgcaattactttgggctggcgtaataccgaaaaataaaaaggggaaatggataacgaaaatggaaaattactggttcaggcaggtaaaagcgcactaa +>15 +atgaaaaaattttatagctgtcttcctgtctttttactgatcggctgtgcccaggtgcccctcccttcctccgtgagcaaaccggtacagcaacctggcgctcagaaagagcaactggccaacgcaaatagtattgatgagtgtcagtctcttccgtatgtgccgtcagaccttgcgaagaataaatcattatcaaaccagaacgctgataattccgcatcaaaaaatagcgcaatcagctcaagcattttttgcgaaaaatataaacaaaccaaagaacaggcgcttaccttcttccaggaacatccacaatacatgcgttcgaaagaagatgaagagcaactcatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa +>16 +atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa +>17 +gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggcctcacaacatgcctggagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgtcagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcaccatccagtcggcaacctggttttccgcaggcaggtcgcttacggcggataaaaaaatgatggattgcggcagcggtatttatgcttcaatcaatacgttactgaaaaagtcgcagaataaaaatatagtcatttttacccataatcattgtttgacgtatattgcaaaaaataagcgcggcgtgaagtttgatccggactatcttaatgcgctcgttatgtatgcggaaaacggaaaactctttttagacggtgaatttgttcccggctag +>18 +atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga +>19 +atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/search/G7/blast/protein/hsps.txt b/tests/test_data/outputs/search/G7/blast/protein/hsps.txt new file mode 100755 index 0000000..194fd46 --- /dev/null +++ b/tests/test_data/outputs/search/G7/blast/protein/hsps.txt @@ -0,0 +1,20 @@ +0 0 34 34 1 34 1 34 34 0 100.000 100 100 N/A 4.94e-20 64.3 +1 9 254 254 2 254 2 254 253 0 100.000 99 99 N/A 0.0 512 +2 10 286 286 1 286 1 286 286 17 94.056 100 100 N/A 0.0 518 +3 11 324 324 1 324 1 324 324 0 100.000 100 100 N/A 0.0 640 +4 12 366 366 1 366 1 366 366 0 100.000 100 100 N/A 0.0 754 +5 13 427 427 1 427 1 427 427 9 97.892 100 100 N/A 0.0 846 +6 14 478 478 1 478 1 478 478 0 100.000 100 100 N/A 0.0 972 +7 15 488 488 1 488 1 488 488 14 97.131 100 100 N/A 0.0 972 +8 16 612 612 1 612 1 612 612 0 100.000 100 100 N/A 0.0 1241 +9 17 638 638 1 638 1 638 638 0 100.000 100 100 N/A 0.0 1326 +10 18 679 679 1 679 1 679 679 8 98.822 100 100 N/A 0.0 1403 +11 1 95 95 1 95 1 95 95 11 88.421 100 100 N/A 2.89e-51 147 +12 19 1645 1645 1 1645 1 1645 1645 0 100.000 100 100 N/A 0.0 3332 +13 2 109 109 1 109 1 109 109 0 100.000 100 100 N/A 3.09e-81 224 +14 3 139 139 1 139 1 139 139 9 93.525 100 100 N/A 3.70e-98 269 +15 4 148 148 1 148 1 148 148 11 92.568 100 100 N/A 1.56e-97 268 +16 5 181 181 1 181 1 181 181 0 100.000 100 100 N/A 1.79e-140 380 +17 6 202 202 1 202 1 202 202 14 93.069 100 100 N/A 2.36e-141 384 +18 7 214 214 1 214 1 214 214 0 100.000 100 100 N/A 1.75e-157 426 +19 8 228 228 1 228 1 228 228 0 100.000 100 100 N/A 6.32e-172 463 diff --git a/tests/test_data/outputs/search/G7/blast/protein/queries.fasta b/tests/test_data/outputs/search/G7/blast/protein/queries.fasta new file mode 100755 index 0000000..cad8adb --- /dev/null +++ b/tests/test_data/outputs/search/G7/blast/protein/queries.fasta @@ -0,0 +1,40 @@ +>0 +MYDPPFLEALMITASFFAIFIIIVVSVLLLEGD* +>1 +GRILLSNDDGVHAPGIQTLAKALREFADVQVVAPDRNRSGASNSLTLESSLRTFTFDNGDIAVQMGTPTDCVYLGVNALMRPRPDIVVSGINAGPNLGDDVIYSGTVAAAMEGRHLGFPALAVSLNGYQHYDTAAAVTCALLRGLSREPLRTGRILNVNVPDLPLAQVKGIRVTRCGSRHPADKVIPQEDPRGNTLYWIGPPGDKYDAGPDTDFAAVDEGYVSVTPLHVDLTAHSAHDVVSDWLDSVGVGTQW* +>2 +VNLVKTTGLHAAGKGINVAKVLKDLGIDVTVGGFLGKDNQDGFQQLFSELGIANRFQVVQGRTRINVKLTEKDGEVTDFNFSGFDVTPADWERFVNDSLSWLGQFDMVCVSGSLPAGVSPEAFTDWMTRLRSQCPCIIFDSSREALVAGLKAAPWLVKPNRRELEIWAGRKLPEMKDVIDAAHALREQGIAHVVISLGAEGALWVNASGEWIAKPPAVDVVSTVGAGDSMVGGLIYGLLMRESSEHTLRLATAVAALAVSQSNVGITDRPQLAAMMARVDLQPFN* +>3 +MNKNKYSTPLLMLATILAGMLSPMQSAVNGQLGHWLQDGNACAVISFASGLVVMFFIIIARKETRQQFASIPTLIKKRKIPLWNWFAGLCGAMVVFSEGASASALGVATFQTALISALLLSGLLCDRFGIGVEEKKYFTPWRITGALFAVIATIFVVSPQWHSTSFILLAILPFLAGLLAGWQPAGNAKVAEATGSMLVSITWNFIVGFCVLGAALAIRIALGHVTIQLPDTWWMYLGGPLGLLSIGLMAILVRGLGLLMLGVASTAGQLLGSVLIDELIPSLGNTVYLVTIIGTLFALVGAIVTTIPEYRASKMAQKMEVSE* +>4 +MKGRWAKYVATGVMLAMLAACSSKPTDRGQQYKDGKFTQPFSLVNQPDAVGAPINAGDFAEQVDQIRSASPRLYTNQSNVYNAVQNWLRSGGDTRTMRQFGIDAWQMEGTDNYGNVQFTGYYTPVVQARHTRQGAFQYPIYSMPPKRGRLPSRAQIYAGALSDKYILAWSNSLMDNFIMDVQGSGYIDFGDGSPLNFFSYAGKNGWPYRSIGKVLIDRGEVKKEDMSMQAIREWGEKHSEAEVRELLEQNPSFVFFKPQSFAPVKGASAVPLIGRASVASDRSIIPPGTTLLAEVPLLDNNGKFSGQYELRLMVALDVGGAIKGQHFDIYQGIGPDAGHRAGWYNHYGRVWVLKSAPGAGNVFSG* +>5 +LFDRYDAGEQAVLVHIYFSQDKDMEDLQEFESLVSSAGVEAMQVITGSRKAPHPKYFVGEGKAVEIAEAVKATGAAVVLFDHALSPAQERNLERLCECRVIDRTGLILDIFAQRARTHEGKLQVELAQLRHLATRLVRGWTHLERQKGGIGLRGPGETQLETDRRLLRNRIVQIQSRLEKVEKQREQGRQSRIKADVPTVSLVGYTNAGKSTLFNQITEARVYAADQLFATLDPTLRRIDVADVGETVLADTVGFIRHLPYDLVAAFKATLQETRQATLLLHVVDAADVRVQENIEAVNTVLEEIDAHEIPTLMVMNKIDMLDDFEPRIDRDEENKPIRVWLSAQSGVGIPQLFQALTERLSGEVAQHTLRLLPQEGRLRSRFYQLQAIEKEWMEEDGSVSLQVRMPIVDWRRLCKQEPALIEYVI* +>6 +VAQRILVLGASGYIGQHLVFALSQQGHQVRAAARRVERLEKHRLANVSCHKVDLHWPENLPALLRDIDTVYYLVHGMGEGGDFIAHERQAALNVRDALRQTPVKQLIFLSSLQAPAHEQSDHLRARQLTADTLRDAGVPVTELRAGIIVGAGSAAFEVMRDMVYNLPILTPPRWVRSRTTPIALENLLYYLVGLLEHPAHEHRILEAAGPQVLSYQQQFERFMAVSGKRRPLIPVPFPTRWISVWFLNVITSVPPTTAKALIQGLRHDLLADDAALKKLIPQTLITFDDAVRRTLKEEEKLVNSSDWGYDALAFARWRPEYGYFPKQAGFTAQTPASLSALWQVVNRLGGKEGYFFGNILWQTRAAMDRLVGHKLAKGRPSHTLLKPGDTVDSWKVIIVEPEKQLTLLFGMKAPGLGRLSFTLHDKGRYREIDVRAWWHPHGMPGLIYWLLMIPAHLFIFRGMARRIARLAEQITEK* +>7 +MNKFARHFLPLSLRVRFLLATAGVVLVLSLAYGIVALVGYSVSFDKTTFRLLRGESNLFYTLAKWENNKISVELPENLDMQSPTMTLIYDETGKLLWTQRNIPWLIKSIQPEWLKTNGFHEIETNVDATSTLLSEDHSAQEKLKEVREDDDDAEMTHSVAVNIYPATTRMPQLTIVVVDTIPIELKRSYMVWSWFVYVLAANLLLVIPLLWIAAWWSLRPIEALAREVRELEDHHREMLNPETTRELTSLVRNLNQLLKSERERYNKYRTTLTDLTHSLKTPLAVLQSTLRSLRNEKMSVSKAEPVMLEQISRISQQIGYYLHRASMRGSGVLLSRELHPVAPLLDNLISALNKVYQRKGVNISMDISPEISFVGEQNDFVEVMGNVLDNACKYCLEFVEISARQTDDHLHIFVEDDGPGIPHSKRSLVFDRGQRADTLRPGQGVGLAVAREITEQYAGQIIASDSLLGGARMEVVFGRQHPTQKEE* +>8 +MTIQKRLLEAVEQKLLRPIDAQFALTVAGNDDPAVTLAAALLSHDAGEGHVCLPLSRLTLTEEAHPLLVAWISETATPIDWKKRLLASAAVSCGDSPAPLILCGDRLYLNRMWCNERTVARFFNEVNQAIAVDEDQLSRILDALFPPTDEVNWQKVAAAVALTRRISVISGGPGTGKTTTVAKLLAALIQMADGERCRIRLAAPTGKAAARLTESLGAALRQLPLTDAQKKRIPEDASTLHRLLGAQPGSQRLRHHAGNPLHLDVLVVDEASMIDLPMMSRLIDALPPHGRVIFLGDRDQLASVEAGAVLGDICAYVNAGFTAERARQLSRLTGSAIPAGAGTQAASLRDSLCLLQKSYRFGSDSGIGKLAAAINCGDRSAIQAVFQQGFSDIEKRTLQSSDDYAGMLDEALAGYGRYLRLLHEKAAPEAILQAFNEYQLLCALREGPFGVRGLNDRIEQAMVQQRKIQRHPHSRWYEGRPVMIARNDSALGLFNGDIGIALDRGQGLRVWFVMPDGTIKSVQPSRLPEHDTTWAMTVHKSQGSEFDHAALILPSQRSPVVTRELVYTAVTRARRRLSLYADERILAGAIVTRTERRSGLATLFDEVSRIG* +>9 +MQEVAMSSQEASKMLRTYNIAWWGNNYYDVNELGHISVCPDPDVPEARVDLAKLVKAREAQGQRLPALFCFPQILQHRLRSINAAFKRARESYGYNGDYFLVYPIKVNQHRRVIESLIHSGEPLGLEAGSKAELMAVLAHAGMTRSVIVCNGYKDREYIRLALIGEKMGHKVYLVIEKMSEIAIVLEEAERLNVVPRLGVRARLASQGSGKWQSSGGEKSKFGLAATQVLQLVETLRDAGRLDSLQLLHFHLGSQMANIRDIATGVRESARFYVELHKLGVNIQCFDVGGGLGVDYEGTRSQSDCSVNYGLNEYANNIIWAIGDACEEHGLPHPTVITESGRAVTAHHTVLVSNIIGVERNEYTDPTAPAEDAPRALQNLWETWQEMHKPGTRRSLREWLHDSQMDLHDIHIGYSSGAFSLQERAWAEQLYLSMCHEVQKQLDPQNRAHRPIIDELQERMADKMYVNFSLFQSMPDAWGIDQLFPVLPLEGLDQVPERRAVLLDITCDSDGAIDHYIDGDGIATTMPMPEYDPENPPMLGFFMVGAYQEILGNMHNLFGDTEAVDVFVFPDGSVEVELSDEGDTVADMLQYVQLDPKTLLTHFRDQVKQTDLDDALQQQFLEEFEAGLYGYTYLEDE* +>10 +MNSLPQRSTDFELTTSQDGFALSWQQRLILRHSAENPCLWIGAGVADIDMFRGNFSIKDKLNEKIALTEATVSELPDGWLVQFSRGATISATLRISADEAGRLTLDLQNDDLHHNRIWLRLAANPDDHIYGCGEQFSYFDLRGKPFPLWTSEQGVGRNKTSYVTWQADCKENAGGDYYWTFFPQPTFVSTQKYYCHVDNSCYMNFDFSAPEYHELALWEDKTTLRFECADTYIALLEKLTALLGRQPELPDWVYDGVTLGIQGGTEVCQQKLDTMRNAGVKVNGIWAQDWSGIRMTSFGKRVMWNWKWNSDNYPQLDSRIKQWKEEGVQFLSYINPYVASDKDLCAEAARHGYLAKDATGGDYLVEFGEFYGGVVDLTNPEAYDWFKDVIKKNMIALGCSGWMADFGEYLPTDTYLHNGVSAEIMHNAWPALWAKCNYEALQETGKLGEILFFMRAGYTGSQKYSTMMWAGDQNVDWSLDDGLASVVPAALSLAMTGHGLHHSDIGGYTTLFDMKRSKELLLRWCDFSAFTPMMRTHEGNRPGNNWQFDGDAETIAHFARMTTVFTTLKPYLKQAVAQNAATGLPVMRPLFLHYENDAATYTLKYQYLLGQDLLVAPVHEQGRCDWTLYLPEDHWVNIWTGEAHHGGEITVDAPIGKPPVFYRAKSEWALLFASLRNI* +>11 +MKLFLTTAALTATLISGMAFASDPVIPWATNSGGTESTHIAAMGEDLNAQHQQITHTHEGVWAANSGSIQADEAALTSNKPPVQGHPELMPHQG* +>12 +MKHLRVVACMIMLALAGCDNNDKTAPTTKSEAPAVAQPSPAQDPSQLQKLAQQSQGKALTLLDASEAQLDGAATLVLTFSIPLDPEQDFSRVVHVVDKKSGSVDGAWELAPNLKELRLRHLEPERVLVVTVDPAVKALNNATFGKSYEKTITTRDVQPSVGFASRGSLLPGKIAEGLPVMALNVNHVDVNFFRVKPGSLASFVSQWEYRSSLSNWESDNLLKMADLVYTGRFDLNPARNTREKLLLPLSDIKPLQQAGVYVAVMNQAGHYNYSNAATLFTLSDIGVSAHRYHSRLDIFTQSLENGAAQSGIEIVLLNDKGQTLAQATSDAQGHVQLEADKAAALLLARKEGQTTLLDLTLPALDLSEFNVAGAPGYSKQFFMFGPRDLYRPGETVILNGLLRDSDGKTLPDQPVKLEVVKPDGQVMRTVVSQPENGLYRLNYPLDINAPTGLWHVRANTGDNLLRSWDFHVEDFMPERMALNLTAQKTPLAPADEVKFSVVGYYLYGAPANGNTLQGQLFLRPLRDAVAALPGFQFGNIAEENLSRSLDEVQLTLDKGGRGEVSAASQWQEAHSPLQVILQASLLESGGRPVTRRVEQAIWPADTLPGIRPQFAAKAVYDYRTDTTVNQPIVDEDSNAAFDIVYANAQGEKKAVSGLQVRLIRERRDYYWNWSESEGWQSQFDQKDLVEGEQTLDLNADETGKVSFPVEWGAYRLEVKAPNETVSSVRFWAGYSWQDNSDGSGAARPDRVTLKLDKANYRPGDTMKLHIAAPVAGKGYAMVESSDGPLWWQAIDVPAQGLELTIPVDKTWNRHDLYLSTLVVRPGDKSRSATPKRAVGLLHLPLGDDNRRLDLALESPAKMRPNQPLTVRVKASVKHGEMPKQINVLVSAVDSGVLNITDYATPDPWQAFFGQKRYGADIYDIYGQVIEGQGRLAALRFGGDGDDLTRGGKPPVNHANIIAQQAQPITLNEQGEGVVTLPIGDFNGELRVMAQAWTADDFGRGESKVVVAAPVIAELNMPRFLAGGDVSRLVLDVTNLTDRPQTLNIALAASGLLELLSQQPQPVNLAPGVRTTLFVPVRALEGFGEGEIQATISGLNLPGETLGAQHKQWQIGVRPAWPAQTVNSGIALAPGESWHVPEQHLANVSPATLQGQLLLSGKPPLNLARYIRELKAYPYGCLEQTTSGLFPALYTNAAQLQSLGITGDSDEKRRAAVDIGISRILQMQRDNGGFALWDENGAEEPWLTAYAMDFLIRAGEQGYSVPPEAINRGNERLLRYLQDPGTMLIRYSDNTQASTFAAQAYAALVLARQQKAPLGALREIWERRSQAASGLPLMQLGIALNTMGDARRGEEAITLALNTPRQDERQWIADYGSSLRDNALMLSLLEENNLRPDAQNALLSSLSEQAFGQRWLSTQENNALFLAAHSRQASAGAWQAQTSLEAQPLSGDKALTRNLDADQLAALEVTNTGSQPLWLRLDSSGYPSSAPEPASNVLQIERQILGTDGQRKSLSSLRSGELVLVWLTVVADRNVPDALVVDLLPAGLELENQNLADSSASLPESGSEVQNLLNQMQQADIQYMEFRDDRFVAAVVVNEGQPVTLVYLARAVTPGTYQLAQPQVESMYAPQWRATGASEGLLIVTP* +>13 +MAKNRSRRLRKKMHIDEFQELGFSVAWRFPEGTSEEQIDKTVDDFINDVIEPNKLAFDGSGYLAWEGLICMQEIGKCTEEHQAIVRKWLEARNLEEVRTSELFDVWWD* +>14 +LKLIPFYLLALFSAASGATEINACKDLIGTWKTTADNPPYTMTILPPVEGCGEKCVKLNVQYELDVTHRNALYCHEGQEGVKGQGPMVIAFEGAYGGHAIGTYNRQLQLLWAGVIPKNKKGKWITKMENYWFRQVKAH* +>15 +MKKFYSCLPVFLLIGCAQVPLPSSVSKPVQQPGAQKEQLANANSIDECQSLPYVPSDLAKNKSLSNQNADNSASKNSAISSSIFCEKYKQTKEQALTFFQEHPQYMRSKEDEEQLMTEFKKVLLEPGSKNLSIYQTLLAAHERLQAL* +>16 +MRIKPDDNWRWYYDEEHDRMMLDLANGMLFRSRFSRKMLTPDAFCPTGFCVDDAALYFSFEEKCRDFELTKEQRAELVLNALVAIRYLKPQMPKSWHFVAHGEMWTPGTGDAASVWLSDTAEQVNLLVVEPGENAALCLLAQPGVVIAGRTMQLGDAIKIMNDRLKPQVHCHSFSLEQAV* +>17 +VLAFTLRFIKNKRYFAILAGALVIIAGLASQHAWSGNGLPQINGKALAALAKQHPVVVLFRHAERCDRSDNTCLSDSTGITVNGAQDARALGKAFSADIQNYNLYSSNTVRTIQSATWFSAGRSLTADKKMMDCGSGIYASINTLLKKSQNKNIVIFTHNHCLTYIAKNKRGVKFDPDYLNALVMYAENGKLFLDGEFVPG* +>18 +MGSNYIVIEGLEGAGKTTARDVVVETLEQLGIRNMIFTREPGGTQLAEKLRSLVLDIRSVGDEVITDKAEVLMFYAARVQLVETVIKPALAQGVWVIGDRHDLSTQAYQGGGRGIDQTMLATLRDAVLGDFRPDLTLYLDVTPEVGLKRARARGDLDRIEQESFDFFNRTRARYLELAAQDSRIRTIDATQPLDAVMRDIRATVTKWVQEQAA* +>19 +MKHIKKSVLVVLLTSHVAHASIVVGGTRLVFDGNNDESSINVENKDSKANLVQSWLSVADPQVTNKQVFIITPPLFRLDAGQKNSIRVIRSGAPLPADRESMYWLNIKGIPSIDDNASANRVEISINTQIKLIYRPPALTKSTPDSQSQQLKWQTAGDVITVNNPTPYYMNFASVTLNSHEVKSATFVPPKSSASFKLSSTAAPHGTVTWRLISDYGMSLEPHSGSF* diff --git a/tests/test_data/outputs/search/G7/run.json b/tests/test_data/outputs/search/G7/run.json new file mode 100755 index 0000000..a402ede --- /dev/null +++ b/tests/test_data/outputs/search/G7/run.json @@ -0,0 +1,27 @@ +{ + "analysis_start_time": "10/06/2024 11:12:09", + "parameters": { + "query": "locidex/extract/G7/raw.extracted.seqs.fasta", + "outdir": "locidex/search/G7", + "name": "G7", + "db": "locidex/db", + "config": null, + "min_evalue": 0.0001, + "min_dna_len": 1, + "min_aa_len": 1, + "max_dna_len": 10000000, + "max_aa_len": 10000000, + "min_dna_ident": 80.0, + "min_aa_ident": 80.0, + "min_dna_match_cov": 80.0, + "min_aa_match_cov": 80.0, + "max_target_seqs": 10, + "n_threads": 8, + "format": null, + "translation_table": 11, + "annotate": false, + "force": true + }, + "result_file": "locidex/search/G7/seq_store.json", + "analysis_end_time": "10/06/2024 11:12:11" +} \ No newline at end of file diff --git a/tests/test_data/outputs/search/G7/seq_store.json b/tests/test_data/outputs/search/G7/seq_store.json new file mode 100755 index 0000000..8416754 --- /dev/null +++ b/tests/test_data/outputs/search/G7/seq_store.json @@ -0,0 +1,1744 @@ +{ + "db_info": { + "db_name": "Locidex Test Database", + "db_version": "1.0.0", + "db_date": "2024/06/06", + "db_author": "James Robertson", + "db_desc": "Using Salmonella GCA_000007545.2 as source data", + "db_num_seqs": 20, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" + }, + "db_seq_info": { + "0": { + "seq_id": 0, + "locus_name": "locus_1", + "locus_name_alt": "SALM_11273", + "locus_product": "!", + "locus_description": "hypothetical protein", + "locus_uid": "1", + "dna_seq_len": 102, + "dna_seq_hash": "d17b02d12afa7f832ee37df6f24a8f55", + "aa_seq_len": 34, + "aa_seq_hash": "84ac553cb45dd790c497c27152888f02", + "dna_min_len": 71.4, + "dna_max_len": 132.6, + "aa_min_len": 23.8, + "aa_max_len": 44.2, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "1": { + "seq_id": 1, + "locus_name": "locus_2", + "locus_name_alt": "SALM_120", + "locus_product": "@", + "locus_description": "outer membrane protein", + "locus_uid": "B", + "dna_seq_len": 285, + "dna_seq_hash": "e35184c8ff18e9116fc8faef20532f56", + "aa_seq_len": 95, + "aa_seq_hash": "2d9e7f7f0d11bf02db860b0799e7924d", + "dna_min_len": 199.5, + "dna_max_len": 370.5, + "aa_min_len": 66.5, + "aa_max_len": 123.5, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "2": { + "seq_id": 2, + "locus_name": "locus_3", + "locus_name_alt": "SALM_2016", + "locus_product": "#", + "locus_description": "tRNA (guanosine(46)-N7)-methyltransferase TrmB", + "locus_uid": "C", + "dna_seq_len": 327, + "dna_seq_hash": "670705cd2a59c4a23a897ac656a888fe", + "aa_seq_len": 109, + "aa_seq_hash": "4277f8bbf1fd6682001da089fea83d04", + "dna_min_len": 228.9, + "dna_max_len": 425.1, + "aa_min_len": 76.3, + "aa_max_len": 141.7, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "3": { + "seq_id": 3, + "locus_name": "locus_4", + "locus_name_alt": "SALM_8644", + "locus_product": "$", + "locus_description": "AZ624_004720", + "locus_uid": "AZ624_004720", + "dna_seq_len": 417, + "dna_seq_hash": "ac1b21798c0f672ad26f5a91ea278590", + "aa_seq_len": 139, + "aa_seq_hash": "0c25367401155278f34832f184ab44e6", + "dna_min_len": 291.9, + "dna_max_len": 542.1, + "aa_min_len": 97.3, + "aa_max_len": 180.7, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "4": { + "seq_id": 4, + "locus_name": "locus_5", + "locus_name_alt": "SALM_1876", + "locus_product": "%", + "locus_description": "SPI-1 type III secretion system invasion lipoprotein InvH", + "locus_uid": "E", + "dna_seq_len": 444, + "dna_seq_hash": "d00defcca8588f21ce16fa1d0ac13389", + "aa_seq_len": 148, + "aa_seq_hash": "c8bf12a8057fc5e541fcd4924136a40d", + "dna_min_len": 310.8, + "dna_max_len": 577.2, + "aa_min_len": 103.6, + "aa_max_len": 192.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "5": { + "seq_id": 5, + "locus_name": "locus_6", + "locus_name_alt": "SALM_640", + "locus_product": "^", + "locus_description": "MOSC domain-containing protein", + "locus_uid": "F", + "dna_seq_len": 543, + "dna_seq_hash": "a11561f2804e2c32c78049f8b9aeb517", + "aa_seq_len": 181, + "aa_seq_hash": "1f6a45ea291940ef2c17ec1cfdd5fbdd", + "dna_min_len": 380.1, + "dna_max_len": 705.9, + "aa_min_len": 126.7, + "aa_max_len": 235.3, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "6": { + "seq_id": 6, + "locus_name": "locus_7", + "locus_name_alt": "SALM_1501", + "locus_product": "&", + "locus_description": "India: Vellore", + "locus_uid": "G", + "dna_seq_len": 606, + "dna_seq_hash": "dc94bf1ec4ff9bed2a1f460cbd958656", + "aa_seq_len": 202, + "aa_seq_hash": "62252b3326997117f127efb88ff09294", + "dna_min_len": 424.2, + "dna_max_len": 787.8, + "aa_min_len": 141.4, + "aa_max_len": 262.6, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "7": { + "seq_id": 7, + "locus_name": "locus_8", + "locus_name_alt": "SALM_756", + "locus_product": "*", + "locus_description": "DNA polymerase III subunit delta'", + "locus_uid": "H", + "dna_seq_len": 642, + "dna_seq_hash": "7ebe74afecf146ec4db816c8deced64f", + "aa_seq_len": 214, + "aa_seq_hash": "2449629747a7c58f0f2cad411db87178", + "dna_min_len": 449.4, + "dna_max_len": 834.6, + "aa_min_len": 149.8, + "aa_max_len": 278.2, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "8": { + "seq_id": 8, + "locus_name": "locus_9", + "locus_name_alt": "SALM_7353", + "locus_product": "1", + "locus_description": "fimbrial assembly chaperone", + "locus_uid": "I", + "dna_seq_len": 684, + "dna_seq_hash": "41ebb36872854b2b33c8c028e23d8ad1", + "aa_seq_len": 228, + "aa_seq_hash": "a75a93991228940fb46d917f238beb5a", + "dna_min_len": 478.8, + "dna_max_len": 889.2, + "aa_min_len": 159.6, + "aa_max_len": 296.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "9": { + "seq_id": 9, + "locus_name": "locus_10", + "locus_name_alt": "SALM_1891", + "locus_product": "200.96", + "locus_description": "5'/3'-nucleotidase SurE", + "locus_uid": "J", + "dna_seq_len": 762, + "dna_seq_hash": "fe04d17ec353c08b903c85fc0ca4dc02", + "aa_seq_len": 254, + "aa_seq_hash": "bd09702e070040e0fc8d2ec3b830812c", + "dna_min_len": 533.4, + "dna_max_len": 990.6, + "aa_min_len": 177.8, + "aa_max_len": 330.2, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "10": { + "seq_id": 10, + "locus_name": "locus_11", + "locus_name_alt": "SALM_1452", + "locus_product": "|", + "locus_description": "1-phosphofructokinase", + "locus_uid": "K", + "dna_seq_len": 858, + "dna_seq_hash": "5b128d659955716833ce42f2bb060212", + "aa_seq_len": 286, + "aa_seq_hash": "4daecf1a6ccae76e1d97a4ce9ee4ff0b", + "dna_min_len": 600.6, + "dna_max_len": 1115.4, + "aa_min_len": 200.2, + "aa_max_len": 371.8, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "11": { + "seq_id": 11, + "locus_name": "locus_12", + "locus_name_alt": "SALM_11020", + "locus_product": "_", + "locus_description": "1", + "locus_uid": "L", + "dna_seq_len": 972, + "dna_seq_hash": "eb72da68c159497d5f0c8eeddc51b5ae", + "aa_seq_len": 324, + "aa_seq_hash": "a1f16dd269dc295e715f2d00f9c26b43", + "dna_min_len": 680.4, + "dna_max_len": 1263.6, + "aa_min_len": 226.8, + "aa_max_len": 421.2, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "12": { + "seq_id": 12, + "locus_name": "locus_13", + "locus_name_alt": "SALM_1934", + "locus_product": "-", + "locus_description": "3.9", + "locus_uid": "M", + "dna_seq_len": 1098, + "dna_seq_hash": "8f300259dcb46224bdc1fe5273107324", + "aa_seq_len": 366, + "aa_seq_hash": "b16bc26e42f5bc4327504b1ec8b2d53d", + "dna_min_len": 768.6, + "dna_max_len": 1427.4, + "aa_min_len": 256.2, + "aa_max_len": 475.8, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "13": { + "seq_id": 13, + "locus_name": "locus_14", + "locus_name_alt": "SALM_2871", + "locus_product": "+", + "locus_description": "@", + "locus_uid": "N", + "dna_seq_len": 1281, + "dna_seq_hash": "b9060019038526aa6fc38d2f7510edc6", + "aa_seq_len": 427, + "aa_seq_hash": "ed11561b2bedaa12c7a28eb0e9346101", + "dna_min_len": 896.7, + "dna_max_len": 1665.3, + "aa_min_len": 298.9, + "aa_max_len": 555.1, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "14": { + "seq_id": 14, + "locus_name": "locus_15", + "locus_name_alt": "SALM_583", + "locus_product": "=", + "locus_description": "DMT family transporter", + "locus_uid": "O", + "dna_seq_len": 1434, + "dna_seq_hash": "bc98c2fe196a68a79036814396513a8d", + "aa_seq_len": 478, + "aa_seq_hash": "16cb2acec887d5861327e04f2705b8ce", + "dna_min_len": 1003.8, + "dna_max_len": 1864.2, + "aa_min_len": 334.6, + "aa_max_len": 621.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "15": { + "seq_id": 15, + "locus_name": "locus_16", + "locus_name_alt": "SALM_780", + "locus_product": "<", + "locus_description": "murein transglycosylase A", + "locus_uid": "P", + "dna_seq_len": 1464, + "dna_seq_hash": "16e55766c603fe33c9e75d8e81743ae2", + "aa_seq_len": 488, + "aa_seq_hash": "b20314e55f9713235e9c4ea5817b56df", + "dna_min_len": 1024.8, + "dna_max_len": 1903.2, + "aa_min_len": 341.6, + "aa_max_len": 634.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "16": { + "seq_id": 16, + "locus_name": "locus_17", + "locus_name_alt": "SALM_1937", + "locus_product": ">", + "locus_description": "GTPase HflX", + "locus_uid": "Q", + "dna_seq_len": 1836, + "dna_seq_hash": "a0d97d985483413f3c18bfe5833ae9ce", + "aa_seq_len": 612, + "aa_seq_hash": "d6012eddc7a6e8d7d40761d00ed71a5a", + "dna_min_len": 1285.2, + "dna_max_len": 2386.8, + "aa_min_len": 428.4, + "aa_max_len": 795.6, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "17": { + "seq_id": 17, + "locus_name": "locus_18", + "locus_name_alt": "SALM_1997", + "locus_product": "?", + "locus_description": "biosynthetic arginine decarboxylase", + "locus_uid": "R", + "dna_seq_len": 1914, + "dna_seq_hash": "b3021e979faa7600756c06dfadfcf14c", + "aa_seq_len": 638, + "aa_seq_hash": "e68e83956ee1d4c685571e5348c8def1", + "dna_min_len": 1339.8, + "dna_max_len": 2488.2, + "aa_min_len": 446.6, + "aa_max_len": 829.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "18": { + "seq_id": 18, + "locus_name": "locus_19", + "locus_name_alt": "SALM_9926", + "locus_product": ",", + "locus_description": "https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662", + "locus_uid": "S", + "dna_seq_len": 2037, + "dna_seq_hash": "a012eee23637b48e39b00808a057e35d", + "aa_seq_len": 679, + "aa_seq_hash": "1302fea1dbd5bb756db34e09b863ad44", + "dna_min_len": 1425.9, + "dna_max_len": 2648.1, + "aa_min_len": 475.3, + "aa_max_len": 882.7, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "19": { + "seq_id": 19, + "locus_name": "locus_20", + "locus_name_alt": "SALM_6064", + "locus_product": ".", + "locus_description": "alpha-2-macroglobulin family protein", + "locus_uid": "T", + "dna_seq_len": 4935, + "dna_seq_hash": "4461918e985715e4a2b07494e1f91326", + "aa_seq_len": 1645, + "aa_seq_hash": "12ff39a1933fa478729b142976b0a659", + "dna_min_len": 3454.5, + "dna_max_len": 6415.5, + "aa_min_len": 1151.5, + "aa_max_len": 2138.5, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + } + }, + "query_data": { + "sample_name": "G7", + "query_seq_data": { + "0": { + "parent_id": "locus_1:0:0:0", + "locus_name": "locus_1:0:0:0", + "seq_id": "locus_1:0:0:0", + "dna_hash": "d17b02d12afa7f832ee37df6f24a8f55", + "dna_len": 102, + "aa_hash": "a931d1f75114576e60538364eb01a05f", + "aa_len": 34, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "1": { + "parent_id": "locus_10:9:0:1", + "locus_name": "locus_10:9:0:1", + "seq_id": "locus_10:9:0:1", + "dna_hash": "796419469778f7ec3851c813f59cfff7", + "dna_len": 762, + "aa_hash": "7cf5ac5873242fd0899f2ce5f93d01c3", + "aa_len": 254, + "start_codon": "ggg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "2": { + "parent_id": "locus_11:10:0:2", + "locus_name": "locus_11:10:0:2", + "seq_id": "locus_11:10:0:2", + "dna_hash": "c4266f2f24fdd8e039113c6b0955af9f", + "dna_len": 858, + "aa_hash": "9b9be0e0a2b6f84053716d6c14a0fb9a", + "aa_len": 286, + "start_codon": "gtg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "3": { + "parent_id": "locus_12:11:0:3", + "locus_name": "locus_12:11:0:3", + "seq_id": "locus_12:11:0:3", + "dna_hash": "eb72da68c159497d5f0c8eeddc51b5ae", + "dna_len": 972, + "aa_hash": "2fb77f6d0f615a030a08af74f3eda277", + "aa_len": 324, + "start_codon": "atg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "4": { + "parent_id": "locus_13:12:0:4", + "locus_name": "locus_13:12:0:4", + "seq_id": "locus_13:12:0:4", + "dna_hash": "8f300259dcb46224bdc1fe5273107324", + "dna_len": 1098, + "aa_hash": "fd5140611af6e0d9f95426bf9beaa8b1", + "aa_len": 366, + "start_codon": "atg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "5": { + "parent_id": "locus_14:13:0:5", + "locus_name": "locus_14:13:0:5", + "seq_id": "locus_14:13:0:5", + "dna_hash": "2fa0b06ed72e36b4071cab9d0b4f87d0", + "dna_len": 1281, + "aa_hash": "bf5190f310477277da454725d434a8ee", + "aa_len": 427, + "start_codon": "ttg", + "stop_codon": "tag", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "6": { + "parent_id": "locus_15:14:0:6", + "locus_name": "locus_15:14:0:6", + "seq_id": "locus_15:14:0:6", + "dna_hash": "bc98c2fe196a68a79036814396513a8d", + "dna_len": 1434, + "aa_hash": "5d8c3cf06444c3741ebfc57cc92db0d3", + "aa_len": 478, + "start_codon": "gtg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "7": { + "parent_id": "locus_16:15:0:7", + "locus_name": "locus_16:15:0:7", + "seq_id": "locus_16:15:0:7", + "dna_hash": "a9b3cb97dac3cda6e932a49bf9a507bd", + "dna_len": 1464, + "aa_hash": "3ca5f1d7b46eda9460608ef61603c12f", + "aa_len": 488, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "8": { + "parent_id": "locus_17:16:0:8", + "locus_name": "locus_17:16:0:8", + "seq_id": "locus_17:16:0:8", + "dna_hash": "a0d97d985483413f3c18bfe5833ae9ce", + "dna_len": 1836, + "aa_hash": "bdd2b967a7a6337b5973dd723a62fa43", + "aa_len": 612, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "9": { + "parent_id": "locus_18:17:0:9", + "locus_name": "locus_18:17:0:9", + "seq_id": "locus_18:17:0:9", + "dna_hash": "b3021e979faa7600756c06dfadfcf14c", + "dna_len": 1914, + "aa_hash": "42c4a831ee79a27c47138fe96829814b", + "aa_len": 638, + "start_codon": "atg", + "stop_codon": "tag", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "10": { + "parent_id": "locus_19:18:0:10", + "locus_name": "locus_19:18:0:10", + "seq_id": "locus_19:18:0:10", + "dna_hash": "de32372598811d63bcc1a0eaf6872644", + "dna_len": 2037, + "aa_hash": "a48a4e4dc8c7f61a7be06a7f72142198", + "aa_len": 679, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "11": { + "parent_id": "locus_2:1:0:11", + "locus_name": "locus_2:1:0:11", + "seq_id": "locus_2:1:0:11", + "dna_hash": "8b70e777f6bbf2c91ff75947824b5976", + "dna_len": 285, + "aa_hash": "6e403f4ed2da629ea2ebfe18278ed120", + "aa_len": 95, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "12": { + "parent_id": "locus_20:19:0:12", + "locus_name": "locus_20:19:0:12", + "seq_id": "locus_20:19:0:12", + "dna_hash": "4461918e985715e4a2b07494e1f91326", + "dna_len": 4935, + "aa_hash": "ffe361d9273afbab0ed9009dd5aa4041", + "aa_len": 1645, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "13": { + "parent_id": "locus_3:2:0:13", + "locus_name": "locus_3:2:0:13", + "seq_id": "locus_3:2:0:13", + "dna_hash": "670705cd2a59c4a23a897ac656a888fe", + "dna_len": 327, + "aa_hash": "f46f4f7b2362abaf1440c9752e10670e", + "aa_len": 109, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "14": { + "parent_id": "locus_4:3:0:14", + "locus_name": "locus_4:3:0:14", + "seq_id": "locus_4:3:0:14", + "dna_hash": "73790840c76943caac0ebb3b2b3f0b98", + "dna_len": 417, + "aa_hash": "77784601d754a5f36152853592023b08", + "aa_len": 139, + "start_codon": "ctg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "15": { + "parent_id": "locus_5:4:0:15", + "locus_name": "locus_5:4:0:15", + "seq_id": "locus_5:4:0:15", + "dna_hash": "8cf4341689dd00f74adfcc43d1f4a35e", + "dna_len": 444, + "aa_hash": "736cc3184dda2c5ac596f76753272622", + "aa_len": 148, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "16": { + "parent_id": "locus_6:5:0:16", + "locus_name": "locus_6:5:0:16", + "seq_id": "locus_6:5:0:16", + "dna_hash": "a11561f2804e2c32c78049f8b9aeb517", + "dna_len": 543, + "aa_hash": "f7a904be6607ca5a6d9388ad9ca3693a", + "aa_len": 181, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "17": { + "parent_id": "locus_7:6:0:17", + "locus_name": "locus_7:6:0:17", + "seq_id": "locus_7:6:0:17", + "dna_hash": "49d9878c9d3071aa1d2f26cb947b784c", + "dna_len": 606, + "aa_hash": "a1169e1ef4c2882247a9349da07cb6bd", + "aa_len": 202, + "start_codon": "gtg", + "stop_codon": "tag", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "18": { + "parent_id": "locus_8:7:0:18", + "locus_name": "locus_8:7:0:18", + "seq_id": "locus_8:7:0:18", + "dna_hash": "7ebe74afecf146ec4db816c8deced64f", + "dna_len": 642, + "aa_hash": "ad5a47cb3a55985083b963b0cf67f382", + "aa_len": 214, + "start_codon": "atg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "19": { + "parent_id": "locus_9:8:0:19", + "locus_name": "locus_9:8:0:19", + "seq_id": "locus_9:8:0:19", + "dna_hash": "41ebb36872854b2b33c8c028e23d8ad1", + "dna_len": 684, + "aa_hash": "02989d1dbb505d15f06e76ddc9174b7e", + "aa_len": 228, + "start_codon": "atg", + "stop_codon": "tag", + "count_internal_stop": 0, + "dna_ambig_count": 0 + } + }, + "query_hit_columns": [], + "query_hits": { + "0": { + "nucleotide": [ + { + "qseqid": 0, + "sseqid": 0, + "qlen": 102, + "slen": 102, + "qstart": 1, + "qend": 102, + "sstart": 1, + "send": 102, + "length": 102, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 1.8e-51, + "bitscore": 189 + } + ], + "protein": [ + { + "qseqid": 0, + "sseqid": 0, + "qlen": 34, + "slen": 34, + "qstart": 1, + "qend": 34, + "sstart": 1, + "send": 34, + "length": 34, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 4.94e-20, + "bitscore": 64.3 + } + ] + }, + "1": { + "nucleotide": [ + { + "qseqid": 1, + "sseqid": 9, + "qlen": 762, + "slen": 762, + "qstart": 3, + "qend": 762, + "sstart": 3, + "send": 762, + "length": 760, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 99, + "qcovs": 99, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1404 + } + ], + "protein": [ + { + "qseqid": 1, + "sseqid": 9, + "qlen": 254, + "slen": 254, + "qstart": 2, + "qend": 254, + "sstart": 2, + "send": 254, + "length": 253, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 99, + "qcovs": 99, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 512.0 + } + ] + }, + "2": { + "nucleotide": [ + { + "qseqid": 2, + "sseqid": 10, + "qlen": 858, + "slen": 858, + "qstart": 1, + "qend": 858, + "sstart": 1, + "send": 858, + "length": 858, + "mismatch": 19, + "pident": 97.786, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1480 + } + ], + "protein": [ + { + "qseqid": 2, + "sseqid": 10, + "qlen": 286, + "slen": 286, + "qstart": 1, + "qend": 286, + "sstart": 1, + "send": 286, + "length": 286, + "mismatch": 17, + "pident": 94.056, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 518.0 + } + ] + }, + "3": { + "nucleotide": [ + { + "qseqid": 3, + "sseqid": 11, + "qlen": 972, + "slen": 972, + "qstart": 1, + "qend": 972, + "sstart": 1, + "send": 972, + "length": 972, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1796 + } + ], + "protein": [ + { + "qseqid": 3, + "sseqid": 11, + "qlen": 324, + "slen": 324, + "qstart": 1, + "qend": 324, + "sstart": 1, + "send": 324, + "length": 324, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 640.0 + } + ] + }, + "4": { + "nucleotide": [ + { + "qseqid": 4, + "sseqid": 12, + "qlen": 1098, + "slen": 1098, + "qstart": 1, + "qend": 1098, + "sstart": 1, + "send": 1098, + "length": 1098, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 2028 + } + ], + "protein": [ + { + "qseqid": 4, + "sseqid": 12, + "qlen": 366, + "slen": 366, + "qstart": 1, + "qend": 366, + "sstart": 1, + "send": 366, + "length": 366, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 754.0 + } + ] + }, + "5": { + "nucleotide": [ + { + "qseqid": 5, + "sseqid": 13, + "qlen": 1281, + "slen": 1281, + "qstart": 1, + "qend": 1281, + "sstart": 1, + "send": 1281, + "length": 1281, + "mismatch": 11, + "pident": 99.141, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 2305 + } + ], + "protein": [ + { + "qseqid": 5, + "sseqid": 13, + "qlen": 427, + "slen": 427, + "qstart": 1, + "qend": 427, + "sstart": 1, + "send": 427, + "length": 427, + "mismatch": 9, + "pident": 97.892, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 846.0 + } + ] + }, + "6": { + "nucleotide": [ + { + "qseqid": 6, + "sseqid": 14, + "qlen": 1434, + "slen": 1434, + "qstart": 1, + "qend": 1434, + "sstart": 1, + "send": 1434, + "length": 1434, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 2649 + } + ], + "protein": [ + { + "qseqid": 6, + "sseqid": 14, + "qlen": 478, + "slen": 478, + "qstart": 1, + "qend": 478, + "sstart": 1, + "send": 478, + "length": 478, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 972.0 + } + ] + }, + "7": { + "nucleotide": [ + { + "qseqid": 7, + "sseqid": 15, + "qlen": 1464, + "slen": 1464, + "qstart": 1, + "qend": 1464, + "sstart": 1, + "send": 1464, + "length": 1464, + "mismatch": 15, + "pident": 98.975, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 2621 + } + ], + "protein": [ + { + "qseqid": 7, + "sseqid": 15, + "qlen": 488, + "slen": 488, + "qstart": 1, + "qend": 488, + "sstart": 1, + "send": 488, + "length": 488, + "mismatch": 14, + "pident": 97.131, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 972.0 + } + ] + }, + "8": { + "nucleotide": [ + { + "qseqid": 8, + "sseqid": 16, + "qlen": 1836, + "slen": 1836, + "qstart": 1, + "qend": 1836, + "sstart": 1, + "send": 1836, + "length": 1836, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 3391 + } + ], + "protein": [ + { + "qseqid": 8, + "sseqid": 16, + "qlen": 612, + "slen": 612, + "qstart": 1, + "qend": 612, + "sstart": 1, + "send": 612, + "length": 612, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 1241.0 + } + ] + }, + "9": { + "nucleotide": [ + { + "qseqid": 9, + "sseqid": 17, + "qlen": 1914, + "slen": 1914, + "qstart": 1, + "qend": 1914, + "sstart": 1, + "send": 1914, + "length": 1914, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 3535 + } + ], + "protein": [ + { + "qseqid": 9, + "sseqid": 17, + "qlen": 638, + "slen": 638, + "qstart": 1, + "qend": 638, + "sstart": 1, + "send": 638, + "length": 638, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 1326.0 + } + ] + }, + "10": { + "nucleotide": [ + { + "qseqid": 10, + "sseqid": 18, + "qlen": 2037, + "slen": 2037, + "qstart": 1, + "qend": 2037, + "sstart": 1, + "send": 2037, + "length": 2037, + "mismatch": 16, + "pident": 99.215, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 3674 + } + ], + "protein": [ + { + "qseqid": 10, + "sseqid": 18, + "qlen": 679, + "slen": 679, + "qstart": 1, + "qend": 679, + "sstart": 1, + "send": 679, + "length": 679, + "mismatch": 8, + "pident": 98.822, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 1403.0 + } + ] + }, + "11": { + "nucleotide": [ + { + "qseqid": 11, + "sseqid": 1, + "qlen": 285, + "slen": 285, + "qstart": 1, + "qend": 285, + "sstart": 1, + "send": 285, + "length": 285, + "mismatch": 17, + "pident": 94.035, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 2.3100000000000004e-124, + "bitscore": 433 + } + ], + "protein": [ + { + "qseqid": 11, + "sseqid": 1, + "qlen": 95, + "slen": 95, + "qstart": 1, + "qend": 95, + "sstart": 1, + "send": 95, + "length": 95, + "mismatch": 11, + "pident": 88.421, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 2.89e-51, + "bitscore": 147.0 + } + ] + }, + "12": { + "nucleotide": [ + { + "qseqid": 12, + "sseqid": 19, + "qlen": 4935, + "slen": 4935, + "qstart": 1, + "qend": 4935, + "sstart": 1, + "send": 4935, + "length": 4935, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 9114 + } + ], + "protein": [ + { + "qseqid": 12, + "sseqid": 19, + "qlen": 1645, + "slen": 1645, + "qstart": 1, + "qend": 1645, + "sstart": 1, + "send": 1645, + "length": 1645, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 3332.0 + } + ] + }, + "13": { + "nucleotide": [ + { + "qseqid": 13, + "sseqid": 2, + "qlen": 327, + "slen": 327, + "qstart": 1, + "qend": 327, + "sstart": 1, + "send": 327, + "length": 327, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 5.34e-176, + "bitscore": 604 + } + ], + "protein": [ + { + "qseqid": 13, + "sseqid": 2, + "qlen": 109, + "slen": 109, + "qstart": 1, + "qend": 109, + "sstart": 1, + "send": 109, + "length": 109, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 3.09e-81, + "bitscore": 224.0 + } + ] + }, + "14": { + "nucleotide": [ + { + "qseqid": 14, + "sseqid": 3, + "qlen": 417, + "slen": 417, + "qstart": 1, + "qend": 417, + "sstart": 1, + "send": 417, + "length": 417, + "mismatch": 11, + "pident": 97.362, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 710 + } + ], + "protein": [ + { + "qseqid": 14, + "sseqid": 3, + "qlen": 139, + "slen": 139, + "qstart": 1, + "qend": 139, + "sstart": 1, + "send": 139, + "length": 139, + "mismatch": 9, + "pident": 93.525, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 3.6999999999999996e-98, + "bitscore": 269.0 + } + ] + }, + "15": { + "nucleotide": [ + { + "qseqid": 15, + "sseqid": 4, + "qlen": 444, + "slen": 444, + "qstart": 1, + "qend": 444, + "sstart": 1, + "send": 444, + "length": 444, + "mismatch": 15, + "pident": 96.622, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 737 + } + ], + "protein": [ + { + "qseqid": 15, + "sseqid": 4, + "qlen": 148, + "slen": 148, + "qstart": 1, + "qend": 148, + "sstart": 1, + "send": 148, + "length": 148, + "mismatch": 11, + "pident": 92.568, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 1.5600000000000001e-97, + "bitscore": 268.0 + } + ] + }, + "16": { + "nucleotide": [ + { + "qseqid": 16, + "sseqid": 5, + "qlen": 543, + "slen": 543, + "qstart": 1, + "qend": 543, + "sstart": 1, + "send": 543, + "length": 543, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1003 + } + ], + "protein": [ + { + "qseqid": 16, + "sseqid": 5, + "qlen": 181, + "slen": 181, + "qstart": 1, + "qend": 181, + "sstart": 1, + "send": 181, + "length": 181, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 1.7899999999999999e-140, + "bitscore": 380.0 + } + ] + }, + "17": { + "nucleotide": [ + { + "qseqid": 17, + "sseqid": 6, + "qlen": 606, + "slen": 606, + "qstart": 1, + "qend": 606, + "sstart": 1, + "send": 606, + "length": 606, + "mismatch": 15, + "pident": 97.525, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1037 + } + ], + "protein": [ + { + "qseqid": 17, + "sseqid": 6, + "qlen": 202, + "slen": 202, + "qstart": 1, + "qend": 202, + "sstart": 1, + "send": 202, + "length": 202, + "mismatch": 14, + "pident": 93.069, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 2.36e-141, + "bitscore": 384.0 + } + ] + }, + "18": { + "nucleotide": [ + { + "qseqid": 18, + "sseqid": 7, + "qlen": 642, + "slen": 642, + "qstart": 1, + "qend": 642, + "sstart": 1, + "send": 642, + "length": 642, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1186 + } + ], + "protein": [ + { + "qseqid": 18, + "sseqid": 7, + "qlen": 214, + "slen": 214, + "qstart": 1, + "qend": 214, + "sstart": 1, + "send": 214, + "length": 214, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 1.75e-157, + "bitscore": 426.0 + } + ] + }, + "19": { + "nucleotide": [ + { + "qseqid": 19, + "sseqid": 8, + "qlen": 684, + "slen": 684, + "qstart": 1, + "qend": 684, + "sstart": 1, + "send": 684, + "length": 684, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1264 + } + ], + "protein": [ + { + "qseqid": 19, + "sseqid": 8, + "qlen": 228, + "slen": 228, + "qstart": 1, + "qend": 228, + "sstart": 1, + "send": 228, + "length": 228, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 6.32e-172, + "bitscore": 463.0 + } + ] + } + }, + "locus_profile": { + "locus_1": { + "nucleotide": [ + "0" + ], + "protein": [ + "0" + ] + }, + "locus_2": { + "nucleotide": [ + "11" + ], + "protein": [ + "11" + ] + }, + "locus_3": { + "nucleotide": [ + "13" + ], + "protein": [ + "13" + ] + }, + "locus_4": { + "nucleotide": [ + "14" + ], + "protein": [ + "14" + ] + }, + "locus_5": { + "nucleotide": [ + "15" + ], + "protein": [ + "15" + ] + }, + "locus_6": { + "nucleotide": [ + "16" + ], + "protein": [ + "16" + ] + }, + "locus_7": { + "nucleotide": [ + "17" + ], + "protein": [ + "17" + ] + }, + "locus_8": { + "nucleotide": [ + "18" + ], + "protein": [ + "18" + ] + }, + "locus_9": { + "nucleotide": [ + "19" + ], + "protein": [ + "19" + ] + }, + "locus_10": { + "nucleotide": [ + "1" + ], + "protein": [ + "1" + ] + }, + "locus_11": { + "nucleotide": [ + "2" + ], + "protein": [ + "2" + ] + }, + "locus_12": { + "nucleotide": [ + "3" + ], + "protein": [ + "3" + ] + }, + "locus_13": { + "nucleotide": [ + "4" + ], + "protein": [ + "4" + ] + }, + "locus_14": { + "nucleotide": [ + "5" + ], + "protein": [ + "5" + ] + }, + "locus_15": { + "nucleotide": [ + "6" + ], + "protein": [ + "6" + ] + }, + "locus_16": { + "nucleotide": [ + "7" + ], + "protein": [ + "7" + ] + }, + "locus_17": { + "nucleotide": [ + "8" + ], + "protein": [ + "8" + ] + }, + "locus_18": { + "nucleotide": [ + "9" + ], + "protein": [ + "9" + ] + }, + "locus_19": { + "nucleotide": [ + "10" + ], + "protein": [ + "10" + ] + }, + "locus_20": { + "nucleotide": [ + "12" + ], + "protein": [ + "12" + ] + } + } + }, + "query_hit_columns": [ + "qseqid", + "sseqid", + "qlen", + "slen", + "qstart", + "qend", + "sstart", + "send", + "length", + "mismatch", + "pident", + "qcovhsp", + "qcovs", + "sstrand", + "evalue", + "bitscore" + ] +} \ No newline at end of file diff --git a/tests/test_data/outputs/search/G8/blast/nucleotide/hsps.txt b/tests/test_data/outputs/search/G8/blast/nucleotide/hsps.txt new file mode 100755 index 0000000..ea86f3a --- /dev/null +++ b/tests/test_data/outputs/search/G8/blast/nucleotide/hsps.txt @@ -0,0 +1,20 @@ +0 0 102 102 1 102 1 102 102 0 100.000 100 100 plus 1.80e-51 189 +1 9 762 762 4 762 4 762 759 0 100.000 99 99 plus 0.0 1402 +2 10 858 858 1 858 1 858 858 0 100.000 100 100 plus 0.0 1585 +3 11 972 972 1 972 1 972 972 0 100.000 100 100 plus 0.0 1796 +4 12 1098 1098 1 1098 1 1098 1098 0 100.000 100 100 plus 0.0 2028 +5 13 1281 1281 1 1281 1 1281 1281 0 100.000 100 100 plus 0.0 2366 +6 14 1434 1434 1 1434 1 1434 1434 0 100.000 100 100 plus 0.0 2649 +7 15 1464 1464 1 1464 1 1464 1464 0 100.000 100 100 plus 0.0 2704 +8 16 1836 1836 1 1836 1 1836 1836 0 100.000 100 100 plus 0.0 3391 +9 17 1914 1914 1 1914 1 1914 1914 0 100.000 100 100 plus 0.0 3535 +10 18 2037 2037 1 2037 1 2037 2037 0 100.000 100 100 plus 0.0 3762 +11 1 285 285 1 285 1 285 285 0 100.000 100 100 plus 1.03e-152 527 +12 19 4935 4935 1 4935 1 4935 4935 0 100.000 100 100 plus 0.0 9114 +13 2 327 327 1 327 1 327 327 0 100.000 100 100 plus 5.34e-176 604 +14 3 417 417 1 417 1 417 417 0 100.000 100 100 plus 0.0 771 +15 4 444 444 1 444 1 444 444 0 100.000 100 100 plus 0.0 821 +16 5 543 543 1 543 1 543 543 0 100.000 100 100 plus 0.0 1003 +17 6 606 606 1 606 1 606 606 0 100.000 100 100 plus 0.0 1120 +18 7 642 642 1 642 1 642 642 0 100.000 100 100 plus 0.0 1186 +19 8 684 684 1 684 1 684 684 0 100.000 100 100 plus 0.0 1264 diff --git a/tests/test_data/outputs/search/G8/blast/nucleotide/queries.fasta b/tests/test_data/outputs/search/G8/blast/nucleotide/queries.fasta new file mode 100755 index 0000000..232dea5 --- /dev/null +++ b/tests/test_data/outputs/search/G8/blast/nucleotide/queries.fasta @@ -0,0 +1,40 @@ +>0 +atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa +>1 +aaacgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa +>2 +gtgaacctggtgaaaaccccgggtctgcacgcggcgggcaaaggcattaacgttgccaatgtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccaacttcaactttcccggctttgacgtgactcccgcagactgggaacgctttgttaacgacttcctgagctggctgggtcagttcgatatggtctgcgtaagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatcgttgatactagccgtgtagagttagtcgccggtcttatagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcccgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgcgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggaaaataccgtcggcgcaggcggttccatggttggcggatggatttacggcctgctgatgcgcgagtccaccgaacatacgctgcgcctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga +>3 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>4 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>5 +ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctattacgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccagcgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcggaaaggcgggattggttcgcgcggtccgggtgaaacccagctcgaagccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgagaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgcagctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaattccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtccgcaggtacgaatgccgattgtcgactggcgtcgcttctgtaaacaagaaccagcgttgatcgaatacgtgatctag +>6 +gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga +>7 +atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctcgtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatctgcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacggtgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcactcaaccggaagggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaaggctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgatgcgctgtcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtaagctgatcagccttgtgcgcaaccttaatcaactgctcaaaagcgagcacgaacgttataacaaataccgcacgagcctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatctgcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcatagcccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttctggccgacagcatcccacacagaaagaggaataa +>8 +atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa +>9 +atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag +>10 +atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacatcgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccacagtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggtcgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattacttgaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcatacagggcggtacggaagtttgtcagcaaaaactggataacatgcgcaacgcaggcgtaaaagtgtacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggggtcagcgcagagctcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacagaagaccggcaagctcggcgagatcctgttctttatgcgggcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccagcaatgactggcagttcgacggcgacgcggaaactattgcacactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgagatttccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa +>11 +atgaaactttttctgacaacggcagcgctaactgcgaccttgacttccggcatgggatttgccagcgataccgtgatcccctgggcgactaacagtggcggtacggaaagcacgcatatcgcggcaatgggcgaggatgtgaatgcgcagcatcagcacatcatccacacgcatgaaggcgtatgtgccgcaaattccggcaccattcaggctgatgaagccgcgctgaccagcaacaagccaccggtacaagtccaaccggaattgttgccgcatcagggctaa +>12 +atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa +>13 +atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa +>14 +ctgaaactaatacctttttatttgctcgcgctattttctgcctcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaacccctgcagataacccgccttataccgtgacaatattgccgccagtagaagcctgcggggaaaaatgtgtgaaactgaacgtagagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaacgacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctacaaccgacagctgcaattacttagggctggcgtaataccgaaaaataaaaagtggaaatggataacgaagatggaaaattactggttcaggcgggtaaaagcgcactaa +>15 +atgaaaaaatttgatagctgtcatcctgtctttttactgatcggctgtgcccaggtgcctctccgttcctccgtgagcaaaccggtacagcaacctagcgctcagaaagagcaactggccaacgcaaatggtattgatgagtgtcagtcacttccgtatgtgccttcagaccttgcgaagaataaatcattatcaaaccagatcgctgataataccgcatcaaaaaatagcgcaatcagctcacgcattttttgcgaaaaatataagcaaaccaaagaacaggcgcttaccttcttccaggaacttccacaatacatgcgttcgaaagaagttgaagagcaacacatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa +>16 +atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa +>17 +gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggactcacaacatgcctgtagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgccagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcgccatccagtcggcaacctggttttcggcaggcaggtcgcgtagggcggttaaaaaaatgatggattgcggcagcggtatttattcttcaatcaatacgttactgaaaaattcgcagattaaaaatatagtcattttaacccataatcattgtttgacgtatattgtaaaaaataagcgcggcgtgaagtttgatccggagtatcttaatgcgctcgttatgtatgcggaaaacggaaaactccttttagacggtgaatttgttcccggctag +>18 +atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga +>19 +atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/search/G8/blast/protein/hsps.txt b/tests/test_data/outputs/search/G8/blast/protein/hsps.txt new file mode 100755 index 0000000..4b86eab --- /dev/null +++ b/tests/test_data/outputs/search/G8/blast/protein/hsps.txt @@ -0,0 +1,20 @@ +0 0 34 34 1 34 1 34 34 0 100.000 100 100 N/A 4.94e-20 64.3 +1 9 254 254 2 254 2 254 253 0 100.000 99 99 N/A 0.0 512 +2 10 286 286 1 286 1 286 286 0 100.000 100 100 N/A 0.0 579 +3 11 324 324 1 324 1 324 324 0 100.000 100 100 N/A 0.0 640 +4 12 366 366 1 366 1 366 366 0 100.000 100 100 N/A 0.0 754 +5 13 427 427 1 427 1 427 427 0 100.000 100 100 N/A 0.0 869 +6 14 478 478 1 478 1 478 478 0 100.000 100 100 N/A 0.0 972 +7 15 488 488 1 488 1 488 488 0 100.000 100 100 N/A 0.0 1004 +8 16 612 612 1 612 1 612 612 0 100.000 100 100 N/A 0.0 1241 +9 17 638 638 1 638 1 638 638 0 100.000 100 100 N/A 0.0 1326 +10 18 679 679 1 679 1 679 679 0 100.000 100 100 N/A 0.0 1419 +11 1 95 95 1 95 1 95 95 0 100.000 100 100 N/A 1.23e-68 191 +12 19 1645 1645 1 1645 1 1645 1645 0 100.000 100 100 N/A 0.0 3332 +13 2 109 109 1 109 1 109 109 0 100.000 100 100 N/A 3.09e-81 224 +14 3 139 139 1 139 1 139 139 0 100.000 100 100 N/A 2.34e-106 290 +15 4 148 148 1 148 1 148 148 0 100.000 100 100 N/A 1.04e-110 302 +16 5 181 181 1 181 1 181 181 0 100.000 100 100 N/A 1.79e-140 380 +17 6 202 202 1 202 1 202 202 0 100.000 100 100 N/A 3.71e-154 416 +18 7 214 214 1 214 1 214 214 0 100.000 100 100 N/A 1.75e-157 426 +19 8 228 228 1 228 1 228 228 0 100.000 100 100 N/A 6.32e-172 463 diff --git a/tests/test_data/outputs/search/G8/blast/protein/queries.fasta b/tests/test_data/outputs/search/G8/blast/protein/queries.fasta new file mode 100755 index 0000000..cb18c87 --- /dev/null +++ b/tests/test_data/outputs/search/G8/blast/protein/queries.fasta @@ -0,0 +1,40 @@ +>0 +MYDPPFLEALMITASFFAIFIIIVVSVLLLEGD* +>1 +KRILLSNDDGVHAPGIQTLAKALREFADVQVVAPDRNRSGASNSLTLESSLRTFTFDNGDIAVQMGTPTDCVYLGVNALMRPRPDIVVSGINAGPNLGDDVIYSGTVAAAMEGRHLGFPALAVSLNGYQHYDTAAAVTCALLRGLSREPLRTGRILNVNVPDLPLAQVKGIRVTRCGSRHPADKVIPQEDPRGNTLYWIGPPGDKYDAGPDTDFAAVDEGYVSVTPLHVDLTAHSAHDVVSDWLDSVGVGTQW* +>2 +VNLVKTPGLHAAGKGINVANVLKDLGIDVTVGGFLGKDNQDGFQQLFSELGIANRFQVVQGRTRINVKLTEKDGEVTNFNFPGFDVTPADWERFVNDFLSWLGQFDMVCVSGSLPAGVSPEAFTDWMTRLRSQCPCIIVDTSRVELVAGLIAAPWLVKPNRRELEIWAGRKLPEMKDVIDAAPALREQGIAHVVISLGAEGALRVNASGEWIAKPPAVDVENTVGAGGSMVGGWIYGLLMRESTEHTLRLATAVAALAVSQSNVGITDRPQLAAMMARVDLQPFN* +>3 +MNKNKYSTPLLMLATILAGMLSPMQSAVNGQLGHWLQDGNACAVISFASGLVVMFFIIIARKETRQQFASIPTLIKKRKIPLWNWFAGLCGAMVVFSEGASASALGVATFQTALISALLLSGLLCDRFGIGVEEKKYFTPWRITGALFAVIATIFVVSPQWHSTSFILLAILPFLAGLLAGWQPAGNAKVAEATGSMLVSITWNFIVGFCVLGAALAIRIALGHVTIQLPDTWWMYLGGPLGLLSIGLMAILVRGLGLLMLGVASTAGQLLGSVLIDELIPSLGNTVYLVTIIGTLFALVGAIVTTIPEYRASKMAQKMEVSE* +>4 +MKGRWAKYVATGVMLAMLAACSSKPTDRGQQYKDGKFTQPFSLVNQPDAVGAPINAGDFAEQVDQIRSASPRLYTNQSNVYNAVQNWLRSGGDTRTMRQFGIDAWQMEGTDNYGNVQFTGYYTPVVQARHTRQGAFQYPIYSMPPKRGRLPSRAQIYAGALSDKYILAWSNSLMDNFIMDVQGSGYIDFGDGSPLNFFSYAGKNGWPYRSIGKVLIDRGEVKKEDMSMQAIREWGEKHSEAEVRELLEQNPSFVFFKPQSFAPVKGASAVPLIGRASVASDRSIIPPGTTLLAEVPLLDNNGKFSGQYELRLMVALDVGGAIKGQHFDIYQGIGPDAGHRAGWYNHYGRVWVLKSAPGAGNVFSG* +>5 +LFDRYDAGEQAVLVHIYFSQDKDMEDLQEFESLVYYAGVEAMQVITGSRKAPHPKYFVGEGKAVEIAEAVKATGAAVVLFDHALSPAQERNLERLCECRVIDRTGLILDIFAQRARTHEGKLQVELAQLRHLATRLVRGWTHLERRKGGIGSRGPGETQLEADRRLLRNRIVQIQSRLEKVEKQREQGRQSRIKADVPTVSLVGYTNAGKSTLFNQITEARVYAADQLFATLDPTLRRIDVADVGETVLADTVGFIRHLPYDLVAAFKATLQETRQATQLLHVVDAADVRVQENIEAVNTVLEEIDAHEFPTLMVMNKIDMLDDFEPRIDRDEENKPIRVWLSAQSGVGIPQLFQALTERLSGEVAQHTLRLLPQEGRLRSRFYQLQAIEKEWMEEDGSVSPQVRMPIVDWRRFCKQEPALIEYVI* +>6 +VAQRILVLGASGYIGQHLVFALSQQGHQVRAAARRVERLEKHRLANVSCHKVDLHWPENLPALLRDIDTVYYLVHGMGEGGDFIAHERQAALNVRDALRQTPVKQLIFLSSLQAPAHEQSDHLRARQLTADTLRDAGVPVTELRAGIIVGAGSAAFEVMRDMVYNLPILTPPRWVRSRTTPIALENLLYYLVGLLEHPAHEHRILEAAGPQVLSYQQQFERFMAVSGKRRPLIPVPFPTRWISVWFLNVITSVPPTTAKALIQGLRHDLLADDAALKKLIPQTLITFDDAVRRTLKEEEKLVNSSDWGYDALAFARWRPEYGYFPKQAGFTAQTPASLSALWQVVNRLGGKEGYFFGNILWQTRAAMDRLVGHKLAKGRPSHTLLKPGDTVDSWKVIIVEPEKQLTLLFGMKAPGLGRLSFTLHDKGRYREIDVRAWWHPHGMPGLIYWLLMIPAHLFIFRGMARRIARLAEQITEK* +>7 +MNKFARHFLPLSLRVRFLLATAGVVLVLSLAYGIVALVGYSVSFDKTTFRLLRGESNLFYTLAKWENNKICVELPENLDMQSPTMTVIYDETGKLLWTQRNIPWLIKSTQPEGLKTNGFHEIETNVDATSTLLSEDHSAQEKLKEVREDDDDAEMTHSVAVNIYPATTRMPQLTIVVVDTIPIELKGSYMVWSWFVYVLAANLLLVIPLLWIAAWWSLRPIDALSREVRELEDHHREMLNPETTRKLISLVRNLNQLLKSEHERYNKYRTSLTDLTHSLKTPLAVLQSTLRSLRNEKMSVSKAEPVMLEQISRISQQIGYYLHRASMRGSGVLLSRELHPVAPLLDNLISALNKVYLRKGVNISMDISPEISFVGEQNDFVEVMGNVLDNACKYCLEFVEISARQTDDHLHIFVEDDGPGIAHSKRSLVFDRGQRADTLRPGQGVGLAVAREITEQYAGQIIASDSLLGGARMEVVSGRQHPTQKEE* +>8 +MTIQKRLLEAVEQKLLRPIDAQFALTVAGNDDPAVTLAAALLSHDAGEGHVCLPLSRLTLTEEAHPLLVAWISETATPIDWKKRLLASAAVSCGDSPAPLILCGDRLYLNRMWCNERTVARFFNEVNQAIAVDEDQLSRILDALFPPTDEVNWQKVAAAVALTRRISVISGGPGTGKTTTVAKLLAALIQMADGERCRIRLAAPTGKAAARLTESLGAALRQLPLTDAQKKRIPEDASTLHRLLGAQPGSQRLRHHAGNPLHLDVLVVDEASMIDLPMMSRLIDALPPHGRVIFLGDRDQLASVEAGAVLGDICAYVNAGFTAERARQLSRLTGSAIPAGAGTQAASLRDSLCLLQKSYRFGSDSGIGKLAAAINCGDRSAIQAVFQQGFSDIEKRTLQSSDDYAGMLDEALAGYGRYLRLLHEKAAPEAILQAFNEYQLLCALREGPFGVRGLNDRIEQAMVQQRKIQRHPHSRWYEGRPVMIARNDSALGLFNGDIGIALDRGQGLRVWFVMPDGTIKSVQPSRLPEHDTTWAMTVHKSQGSEFDHAALILPSQRSPVVTRELVYTAVTRARRRLSLYADERILAGAIVTRTERRSGLATLFDEVSRIG* +>9 +MQEVAMSSQEASKMLRTYNIAWWGNNYYDVNELGHISVCPDPDVPEARVDLAKLVKAREAQGQRLPALFCFPQILQHRLRSINAAFKRARESYGYNGDYFLVYPIKVNQHRRVIESLIHSGEPLGLEAGSKAELMAVLAHAGMTRSVIVCNGYKDREYIRLALIGEKMGHKVYLVIEKMSEIAIVLEEAERLNVVPRLGVRARLASQGSGKWQSSGGEKSKFGLAATQVLQLVETLRDAGRLDSLQLLHFHLGSQMANIRDIATGVRESARFYVELHKLGVNIQCFDVGGGLGVDYEGTRSQSDCSVNYGLNEYANNIIWAIGDACEEHGLPHPTVITESGRAVTAHHTVLVSNIIGVERNEYTDPTAPAEDAPRALQNLWETWQEMHKPGTRRSLREWLHDSQMDLHDIHIGYSSGAFSLQERAWAEQLYLSMCHEVQKQLDPQNRAHRPIIDELQERMADKMYVNFSLFQSMPDAWGIDQLFPVLPLEGLDQVPERRAVLLDITCDSDGAIDHYIDGDGIATTMPMPEYDPENPPMLGFFMVGAYQEILGNMHNLFGDTEAVDVFVFPDGSVEVELSDEGDTVADMLQYVQLDPKTLLTHFRDQVKQTDLDDALQQQFLEEFEAGLYGYTYLEDE* +>10 +MNSLPQRSTDFELTTSQDGFALSWQQRLILRHSAENPCLWIGAGVADIDMFRGNFSIKDKLNEKIALTEATVSELPDGWLVQFSRGATISATLRISADEAGRLTLDLQNDDLHHNRIWLRLAANPDDHIYGCGEQFSYFDLRGKPFPLWTSEQGVGRNKTSYVTWQADCKENAGGDYYLTFFPQPTFVSTQKYYCHVDNSCYMNFDFSAPEYHELALWEDKTTLRFECADTYIALLEKLTALLGRQPELPDWVYDGVTLGIQGGTEVCQQKLDNMRNAGVKVYGIWAQDWSGIRMTSFGKRVMWNWKWNSDNYPQLDSRIKQWKEEGVQFLSYINPYVASDKDLCAEAARHGYLAKDATGGDYLVEFGEFYGGVVDLTNPEAYDWFKDVIKKNMIALGCSGWMADFGEYLPTDTYLHNGVSAELMHNAWPALWAKCNYEALQKTGKLGEILFFMRAGYTGSQKYSTMMWAGDQNVDWSLDDGLASVVPAALSLAMTGHGLHHSDIGGYTTLFDMKRSKELLLRWCDFSAFTPMMRTHEGNRPSNDWQFDGDAETIAHFARMTTVFTTLKPYLKQAVAQNAATGLPVMRPLFLHYENDAATYTLKYQYLLGQDLLVAPVHEQGRCDWTLYLPEDHWVNIWTGEAHHGGEISVDAPIGKPPVFYRAKSEWALLFASLRNI* +>11 +MKLFLTTAALTATLTSGMGFASDTVIPWATNSGGTESTHIAAMGEDVNAQHQHIIHTHEGVCAANSGTIQADEAALTSNKPPVQVQPELLPHQG* +>12 +MKHLRVVACMIMLALAGCDNNDKTAPTTKSEAPAVAQPSPAQDPSQLQKLAQQSQGKALTLLDASEAQLDGAATLVLTFSIPLDPEQDFSRVVHVVDKKSGSVDGAWELAPNLKELRLRHLEPERVLVVTVDPAVKALNNATFGKSYEKTITTRDVQPSVGFASRGSLLPGKIAEGLPVMALNVNHVDVNFFRVKPGSLASFVSQWEYRSSLSNWESDNLLKMADLVYTGRFDLNPARNTREKLLLPLSDIKPLQQAGVYVAVMNQAGHYNYSNAATLFTLSDIGVSAHRYHSRLDIFTQSLENGAAQSGIEIVLLNDKGQTLAQATSDAQGHVQLEADKAAALLLARKEGQTTLLDLTLPALDLSEFNVAGAPGYSKQFFMFGPRDLYRPGETVILNGLLRDSDGKTLPDQPVKLEVVKPDGQVMRTVVSQPENGLYRLNYPLDINAPTGLWHVRANTGDNLLRSWDFHVEDFMPERMALNLTAQKTPLAPADEVKFSVVGYYLYGAPANGNTLQGQLFLRPLRDAVAALPGFQFGNIAEENLSRSLDEVQLTLDKGGRGEVSAASQWQEAHSPLQVILQASLLESGGRPVTRRVEQAIWPADTLPGIRPQFAAKAVYDYRTDTTVNQPIVDEDSNAAFDIVYANAQGEKKAVSGLQVRLIRERRDYYWNWSESEGWQSQFDQKDLVEGEQTLDLNADETGKVSFPVEWGAYRLEVKAPNETVSSVRFWAGYSWQDNSDGSGAARPDRVTLKLDKANYRPGDTMKLHIAAPVAGKGYAMVESSDGPLWWQAIDVPAQGLELTIPVDKTWNRHDLYLSTLVVRPGDKSRSATPKRAVGLLHLPLGDDNRRLDLALESPAKMRPNQPLTVRVKASVKHGEMPKQINVLVSAVDSGVLNITDYATPDPWQAFFGQKRYGADIYDIYGQVIEGQGRLAALRFGGDGDDLTRGGKPPVNHANIIAQQAQPITLNEQGEGVVTLPIGDFNGELRVMAQAWTADDFGRGESKVVVAAPVIAELNMPRFLAGGDVSRLVLDVTNLTDRPQTLNIALAASGLLELLSQQPQPVNLAPGVRTTLFVPVRALEGFGEGEIQATISGLNLPGETLGAQHKQWQIGVRPAWPAQTVNSGIALAPGESWHVPEQHLANVSPATLQGQLLLSGKPPLNLARYIRELKAYPYGCLEQTTSGLFPALYTNAAQLQSLGITGDSDEKRRAAVDIGISRILQMQRDNGGFALWDENGAEEPWLTAYAMDFLIRAGEQGYSVPPEAINRGNERLLRYLQDPGTMLIRYSDNTQASTFAAQAYAALVLARQQKAPLGALREIWERRSQAASGLPLMQLGIALNTMGDARRGEEAITLALNTPRQDERQWIADYGSSLRDNALMLSLLEENNLRPDAQNALLSSLSEQAFGQRWLSTQENNALFLAAHSRQASAGAWQAQTSLEAQPLSGDKALTRNLDADQLAALEVTNTGSQPLWLRLDSSGYPSSAPEPASNVLQIERQILGTDGQRKSLSSLRSGELVLVWLTVVADRNVPDALVVDLLPAGLELENQNLADSSASLPESGSEVQNLLNQMQQADIQYMEFRDDRFVAAVVVNEGQPVTLVYLARAVTPGTYQLAQPQVESMYAPQWRATGASEGLLIVTP* +>13 +MAKNRSRRLRKKMHIDEFQELGFSVAWRFPEGTSEEQIDKTVDDFINDVIEPNKLAFDGSGYLAWEGLICMQEIGKCTEEHQAIVRKWLEARNLEEVRTSELFDVWWD* +>14 +LKLIPFYLLALFSASSGATEINACKDLIGTWKTPADNPPYTVTILPPVEACGEKCVKLNVEYELDVTHRNALYCHERQEGVKGQGPMVIAFEGAYGGHAIGTYNRQLQLLRAGVIPKNKKWKWITKMENYWFRRVKAH* +>15 +MKKFDSCHPVFLLIGCAQVPLRSSVSKPVQQPSAQKEQLANANGIDECQSLPYVPSDLAKNKSLSNQIADNTASKNSAISSRIFCEKYKQTKEQALTFFQELPQYMRSKEVEEQHMTEFKKVLLEPGSKNLSIYQTLLAAHERLQAL* +>16 +MRIKPDDNWRWYYDEEHDRMMLDLANGMLFRSRFSRKMLTPDAFCPTGFCVDDAALYFSFEEKCRDFELTKEQRAELVLNALVAIRYLKPQMPKSWHFVAHGEMWTPGTGDAASVWLSDTAEQVNLLVVEPGENAALCLLAQPGVVIAGRTMQLGDAIKIMNDRLKPQVHCHSFSLEQAV* +>17 +VLAFTLRFIKNKRYFAILAGALVIIAGLDSQHACSGNGLPQINGKALAALAKQHPVVVLFRHAERCDRSDNTCLPDSTGITVNGAQDARALGKAFSADIQNYNLYSSNTVRAIQSATWFSAGRSRRAVKKMMDCGSGIYSSINTLLKNSQIKNIVILTHNHCLTYIVKNKRGVKFDPEYLNALVMYAENGKLLLDGEFVPG* +>18 +MGSNYIVIEGLEGAGKTTARDVVVETLEQLGIRNMIFTREPGGTQLAEKLRSLVLDIRSVGDEVITDKAEVLMFYAARVQLVETVIKPALAQGVWVIGDRHDLSTQAYQGGGRGIDQTMLATLRDAVLGDFRPDLTLYLDVTPEVGLKRARARGDLDRIEQESFDFFNRTRARYLELAAQDSRIRTIDATQPLDAVMRDIRATVTKWVQEQAA* +>19 +MKHIKKSVLVVLLTSHVAHASIVVGGTRLVFDGNNDESSINVENKDSKANLVQSWLSVADPQVTNKQVFIITPPLFRLDAGQKNSIRVIRSGAPLPADRESMYWLNIKGIPSIDDNASANRVEISINTQIKLIYRPPALTKSTPDSQSQQLKWQTAGDVITVNNPTPYYMNFASVTLNSHEVKSATFVPPKSSASFKLSSTAAPHGTVTWRLISDYGMSLEPHSGSF* diff --git a/tests/test_data/outputs/search/G8/run.json b/tests/test_data/outputs/search/G8/run.json new file mode 100755 index 0000000..4c0f386 --- /dev/null +++ b/tests/test_data/outputs/search/G8/run.json @@ -0,0 +1,27 @@ +{ + "analysis_start_time": "10/06/2024 11:12:15", + "parameters": { + "query": "locidex/extract/G8/raw.extracted.seqs.fasta", + "outdir": "locidex/search/G8", + "name": "G8", + "db": "locidex/db", + "config": null, + "min_evalue": 0.0001, + "min_dna_len": 1, + "min_aa_len": 1, + "max_dna_len": 10000000, + "max_aa_len": 10000000, + "min_dna_ident": 80.0, + "min_aa_ident": 80.0, + "min_dna_match_cov": 80.0, + "min_aa_match_cov": 80.0, + "max_target_seqs": 10, + "n_threads": 8, + "format": null, + "translation_table": 11, + "annotate": false, + "force": true + }, + "result_file": "locidex/search/G8/seq_store.json", + "analysis_end_time": "10/06/2024 11:12:17" +} \ No newline at end of file diff --git a/tests/test_data/outputs/search/G8/seq_store.json b/tests/test_data/outputs/search/G8/seq_store.json new file mode 100755 index 0000000..9476613 --- /dev/null +++ b/tests/test_data/outputs/search/G8/seq_store.json @@ -0,0 +1,1744 @@ +{ + "db_info": { + "db_name": "Locidex Test Database", + "db_version": "1.0.0", + "db_date": "2024/06/06", + "db_author": "James Robertson", + "db_desc": "Using Salmonella GCA_000007545.2 as source data", + "db_num_seqs": 20, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" + }, + "db_seq_info": { + "0": { + "seq_id": 0, + "locus_name": "locus_1", + "locus_name_alt": "SALM_11273", + "locus_product": "!", + "locus_description": "hypothetical protein", + "locus_uid": "1", + "dna_seq_len": 102, + "dna_seq_hash": "d17b02d12afa7f832ee37df6f24a8f55", + "aa_seq_len": 34, + "aa_seq_hash": "84ac553cb45dd790c497c27152888f02", + "dna_min_len": 71.4, + "dna_max_len": 132.6, + "aa_min_len": 23.8, + "aa_max_len": 44.2, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "1": { + "seq_id": 1, + "locus_name": "locus_2", + "locus_name_alt": "SALM_120", + "locus_product": "@", + "locus_description": "outer membrane protein", + "locus_uid": "B", + "dna_seq_len": 285, + "dna_seq_hash": "e35184c8ff18e9116fc8faef20532f56", + "aa_seq_len": 95, + "aa_seq_hash": "2d9e7f7f0d11bf02db860b0799e7924d", + "dna_min_len": 199.5, + "dna_max_len": 370.5, + "aa_min_len": 66.5, + "aa_max_len": 123.5, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "2": { + "seq_id": 2, + "locus_name": "locus_3", + "locus_name_alt": "SALM_2016", + "locus_product": "#", + "locus_description": "tRNA (guanosine(46)-N7)-methyltransferase TrmB", + "locus_uid": "C", + "dna_seq_len": 327, + "dna_seq_hash": "670705cd2a59c4a23a897ac656a888fe", + "aa_seq_len": 109, + "aa_seq_hash": "4277f8bbf1fd6682001da089fea83d04", + "dna_min_len": 228.9, + "dna_max_len": 425.1, + "aa_min_len": 76.3, + "aa_max_len": 141.7, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "3": { + "seq_id": 3, + "locus_name": "locus_4", + "locus_name_alt": "SALM_8644", + "locus_product": "$", + "locus_description": "AZ624_004720", + "locus_uid": "AZ624_004720", + "dna_seq_len": 417, + "dna_seq_hash": "ac1b21798c0f672ad26f5a91ea278590", + "aa_seq_len": 139, + "aa_seq_hash": "0c25367401155278f34832f184ab44e6", + "dna_min_len": 291.9, + "dna_max_len": 542.1, + "aa_min_len": 97.3, + "aa_max_len": 180.7, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "4": { + "seq_id": 4, + "locus_name": "locus_5", + "locus_name_alt": "SALM_1876", + "locus_product": "%", + "locus_description": "SPI-1 type III secretion system invasion lipoprotein InvH", + "locus_uid": "E", + "dna_seq_len": 444, + "dna_seq_hash": "d00defcca8588f21ce16fa1d0ac13389", + "aa_seq_len": 148, + "aa_seq_hash": "c8bf12a8057fc5e541fcd4924136a40d", + "dna_min_len": 310.8, + "dna_max_len": 577.2, + "aa_min_len": 103.6, + "aa_max_len": 192.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "5": { + "seq_id": 5, + "locus_name": "locus_6", + "locus_name_alt": "SALM_640", + "locus_product": "^", + "locus_description": "MOSC domain-containing protein", + "locus_uid": "F", + "dna_seq_len": 543, + "dna_seq_hash": "a11561f2804e2c32c78049f8b9aeb517", + "aa_seq_len": 181, + "aa_seq_hash": "1f6a45ea291940ef2c17ec1cfdd5fbdd", + "dna_min_len": 380.1, + "dna_max_len": 705.9, + "aa_min_len": 126.7, + "aa_max_len": 235.3, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "6": { + "seq_id": 6, + "locus_name": "locus_7", + "locus_name_alt": "SALM_1501", + "locus_product": "&", + "locus_description": "India: Vellore", + "locus_uid": "G", + "dna_seq_len": 606, + "dna_seq_hash": "dc94bf1ec4ff9bed2a1f460cbd958656", + "aa_seq_len": 202, + "aa_seq_hash": "62252b3326997117f127efb88ff09294", + "dna_min_len": 424.2, + "dna_max_len": 787.8, + "aa_min_len": 141.4, + "aa_max_len": 262.6, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "7": { + "seq_id": 7, + "locus_name": "locus_8", + "locus_name_alt": "SALM_756", + "locus_product": "*", + "locus_description": "DNA polymerase III subunit delta'", + "locus_uid": "H", + "dna_seq_len": 642, + "dna_seq_hash": "7ebe74afecf146ec4db816c8deced64f", + "aa_seq_len": 214, + "aa_seq_hash": "2449629747a7c58f0f2cad411db87178", + "dna_min_len": 449.4, + "dna_max_len": 834.6, + "aa_min_len": 149.8, + "aa_max_len": 278.2, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "8": { + "seq_id": 8, + "locus_name": "locus_9", + "locus_name_alt": "SALM_7353", + "locus_product": "1", + "locus_description": "fimbrial assembly chaperone", + "locus_uid": "I", + "dna_seq_len": 684, + "dna_seq_hash": "41ebb36872854b2b33c8c028e23d8ad1", + "aa_seq_len": 228, + "aa_seq_hash": "a75a93991228940fb46d917f238beb5a", + "dna_min_len": 478.8, + "dna_max_len": 889.2, + "aa_min_len": 159.6, + "aa_max_len": 296.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "9": { + "seq_id": 9, + "locus_name": "locus_10", + "locus_name_alt": "SALM_1891", + "locus_product": "200.96", + "locus_description": "5'/3'-nucleotidase SurE", + "locus_uid": "J", + "dna_seq_len": 762, + "dna_seq_hash": "fe04d17ec353c08b903c85fc0ca4dc02", + "aa_seq_len": 254, + "aa_seq_hash": "bd09702e070040e0fc8d2ec3b830812c", + "dna_min_len": 533.4, + "dna_max_len": 990.6, + "aa_min_len": 177.8, + "aa_max_len": 330.2, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "10": { + "seq_id": 10, + "locus_name": "locus_11", + "locus_name_alt": "SALM_1452", + "locus_product": "|", + "locus_description": "1-phosphofructokinase", + "locus_uid": "K", + "dna_seq_len": 858, + "dna_seq_hash": "5b128d659955716833ce42f2bb060212", + "aa_seq_len": 286, + "aa_seq_hash": "4daecf1a6ccae76e1d97a4ce9ee4ff0b", + "dna_min_len": 600.6, + "dna_max_len": 1115.4, + "aa_min_len": 200.2, + "aa_max_len": 371.8, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "11": { + "seq_id": 11, + "locus_name": "locus_12", + "locus_name_alt": "SALM_11020", + "locus_product": "_", + "locus_description": "1", + "locus_uid": "L", + "dna_seq_len": 972, + "dna_seq_hash": "eb72da68c159497d5f0c8eeddc51b5ae", + "aa_seq_len": 324, + "aa_seq_hash": "a1f16dd269dc295e715f2d00f9c26b43", + "dna_min_len": 680.4, + "dna_max_len": 1263.6, + "aa_min_len": 226.8, + "aa_max_len": 421.2, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "12": { + "seq_id": 12, + "locus_name": "locus_13", + "locus_name_alt": "SALM_1934", + "locus_product": "-", + "locus_description": "3.9", + "locus_uid": "M", + "dna_seq_len": 1098, + "dna_seq_hash": "8f300259dcb46224bdc1fe5273107324", + "aa_seq_len": 366, + "aa_seq_hash": "b16bc26e42f5bc4327504b1ec8b2d53d", + "dna_min_len": 768.6, + "dna_max_len": 1427.4, + "aa_min_len": 256.2, + "aa_max_len": 475.8, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "13": { + "seq_id": 13, + "locus_name": "locus_14", + "locus_name_alt": "SALM_2871", + "locus_product": "+", + "locus_description": "@", + "locus_uid": "N", + "dna_seq_len": 1281, + "dna_seq_hash": "b9060019038526aa6fc38d2f7510edc6", + "aa_seq_len": 427, + "aa_seq_hash": "ed11561b2bedaa12c7a28eb0e9346101", + "dna_min_len": 896.7, + "dna_max_len": 1665.3, + "aa_min_len": 298.9, + "aa_max_len": 555.1, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "14": { + "seq_id": 14, + "locus_name": "locus_15", + "locus_name_alt": "SALM_583", + "locus_product": "=", + "locus_description": "DMT family transporter", + "locus_uid": "O", + "dna_seq_len": 1434, + "dna_seq_hash": "bc98c2fe196a68a79036814396513a8d", + "aa_seq_len": 478, + "aa_seq_hash": "16cb2acec887d5861327e04f2705b8ce", + "dna_min_len": 1003.8, + "dna_max_len": 1864.2, + "aa_min_len": 334.6, + "aa_max_len": 621.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "15": { + "seq_id": 15, + "locus_name": "locus_16", + "locus_name_alt": "SALM_780", + "locus_product": "<", + "locus_description": "murein transglycosylase A", + "locus_uid": "P", + "dna_seq_len": 1464, + "dna_seq_hash": "16e55766c603fe33c9e75d8e81743ae2", + "aa_seq_len": 488, + "aa_seq_hash": "b20314e55f9713235e9c4ea5817b56df", + "dna_min_len": 1024.8, + "dna_max_len": 1903.2, + "aa_min_len": 341.6, + "aa_max_len": 634.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "16": { + "seq_id": 16, + "locus_name": "locus_17", + "locus_name_alt": "SALM_1937", + "locus_product": ">", + "locus_description": "GTPase HflX", + "locus_uid": "Q", + "dna_seq_len": 1836, + "dna_seq_hash": "a0d97d985483413f3c18bfe5833ae9ce", + "aa_seq_len": 612, + "aa_seq_hash": "d6012eddc7a6e8d7d40761d00ed71a5a", + "dna_min_len": 1285.2, + "dna_max_len": 2386.8, + "aa_min_len": 428.4, + "aa_max_len": 795.6, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "17": { + "seq_id": 17, + "locus_name": "locus_18", + "locus_name_alt": "SALM_1997", + "locus_product": "?", + "locus_description": "biosynthetic arginine decarboxylase", + "locus_uid": "R", + "dna_seq_len": 1914, + "dna_seq_hash": "b3021e979faa7600756c06dfadfcf14c", + "aa_seq_len": 638, + "aa_seq_hash": "e68e83956ee1d4c685571e5348c8def1", + "dna_min_len": 1339.8, + "dna_max_len": 2488.2, + "aa_min_len": 446.6, + "aa_max_len": 829.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "18": { + "seq_id": 18, + "locus_name": "locus_19", + "locus_name_alt": "SALM_9926", + "locus_product": ",", + "locus_description": "https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662", + "locus_uid": "S", + "dna_seq_len": 2037, + "dna_seq_hash": "a012eee23637b48e39b00808a057e35d", + "aa_seq_len": 679, + "aa_seq_hash": "1302fea1dbd5bb756db34e09b863ad44", + "dna_min_len": 1425.9, + "dna_max_len": 2648.1, + "aa_min_len": 475.3, + "aa_max_len": 882.7, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "19": { + "seq_id": 19, + "locus_name": "locus_20", + "locus_name_alt": "SALM_6064", + "locus_product": ".", + "locus_description": "alpha-2-macroglobulin family protein", + "locus_uid": "T", + "dna_seq_len": 4935, + "dna_seq_hash": "4461918e985715e4a2b07494e1f91326", + "aa_seq_len": 1645, + "aa_seq_hash": "12ff39a1933fa478729b142976b0a659", + "dna_min_len": 3454.5, + "dna_max_len": 6415.5, + "aa_min_len": 1151.5, + "aa_max_len": 2138.5, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + } + }, + "query_data": { + "sample_name": "G8", + "query_seq_data": { + "0": { + "parent_id": "locus_1:0:0:0", + "locus_name": "locus_1:0:0:0", + "seq_id": "locus_1:0:0:0", + "dna_hash": "d17b02d12afa7f832ee37df6f24a8f55", + "dna_len": 102, + "aa_hash": "a931d1f75114576e60538364eb01a05f", + "aa_len": 34, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "1": { + "parent_id": "locus_10:9:0:1", + "locus_name": "locus_10:9:0:1", + "seq_id": "locus_10:9:0:1", + "dna_hash": "fd6284b58a891cf02058906c9ee37a00", + "dna_len": 762, + "aa_hash": "443ddee4a99bfc9bbbab56d103f7d81d", + "aa_len": 254, + "start_codon": "aaa", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "2": { + "parent_id": "locus_11:10:0:2", + "locus_name": "locus_11:10:0:2", + "seq_id": "locus_11:10:0:2", + "dna_hash": "5b128d659955716833ce42f2bb060212", + "dna_len": 858, + "aa_hash": "d6a46f107d0604f27820147b523948c8", + "aa_len": 286, + "start_codon": "gtg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "3": { + "parent_id": "locus_12:11:0:3", + "locus_name": "locus_12:11:0:3", + "seq_id": "locus_12:11:0:3", + "dna_hash": "eb72da68c159497d5f0c8eeddc51b5ae", + "dna_len": 972, + "aa_hash": "2fb77f6d0f615a030a08af74f3eda277", + "aa_len": 324, + "start_codon": "atg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "4": { + "parent_id": "locus_13:12:0:4", + "locus_name": "locus_13:12:0:4", + "seq_id": "locus_13:12:0:4", + "dna_hash": "8f300259dcb46224bdc1fe5273107324", + "dna_len": 1098, + "aa_hash": "fd5140611af6e0d9f95426bf9beaa8b1", + "aa_len": 366, + "start_codon": "atg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "5": { + "parent_id": "locus_14:13:0:5", + "locus_name": "locus_14:13:0:5", + "seq_id": "locus_14:13:0:5", + "dna_hash": "b9060019038526aa6fc38d2f7510edc6", + "dna_len": 1281, + "aa_hash": "05bc7823b1abc2e6d4e2c08ca5325134", + "aa_len": 427, + "start_codon": "ttg", + "stop_codon": "tag", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "6": { + "parent_id": "locus_15:14:0:6", + "locus_name": "locus_15:14:0:6", + "seq_id": "locus_15:14:0:6", + "dna_hash": "bc98c2fe196a68a79036814396513a8d", + "dna_len": 1434, + "aa_hash": "5d8c3cf06444c3741ebfc57cc92db0d3", + "aa_len": 478, + "start_codon": "gtg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "7": { + "parent_id": "locus_16:15:0:7", + "locus_name": "locus_16:15:0:7", + "seq_id": "locus_16:15:0:7", + "dna_hash": "16e55766c603fe33c9e75d8e81743ae2", + "dna_len": 1464, + "aa_hash": "f85b3701f5642454bf4d2263feb13354", + "aa_len": 488, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "8": { + "parent_id": "locus_17:16:0:8", + "locus_name": "locus_17:16:0:8", + "seq_id": "locus_17:16:0:8", + "dna_hash": "a0d97d985483413f3c18bfe5833ae9ce", + "dna_len": 1836, + "aa_hash": "bdd2b967a7a6337b5973dd723a62fa43", + "aa_len": 612, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "9": { + "parent_id": "locus_18:17:0:9", + "locus_name": "locus_18:17:0:9", + "seq_id": "locus_18:17:0:9", + "dna_hash": "b3021e979faa7600756c06dfadfcf14c", + "dna_len": 1914, + "aa_hash": "42c4a831ee79a27c47138fe96829814b", + "aa_len": 638, + "start_codon": "atg", + "stop_codon": "tag", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "10": { + "parent_id": "locus_19:18:0:10", + "locus_name": "locus_19:18:0:10", + "seq_id": "locus_19:18:0:10", + "dna_hash": "a012eee23637b48e39b00808a057e35d", + "dna_len": 2037, + "aa_hash": "cb1202450e68e2b4f0d557a645f1a98d", + "aa_len": 679, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "11": { + "parent_id": "locus_2:1:0:11", + "locus_name": "locus_2:1:0:11", + "seq_id": "locus_2:1:0:11", + "dna_hash": "e35184c8ff18e9116fc8faef20532f56", + "dna_len": 285, + "aa_hash": "2a1a77c25ad681437705d9145aef608c", + "aa_len": 95, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "12": { + "parent_id": "locus_20:19:0:12", + "locus_name": "locus_20:19:0:12", + "seq_id": "locus_20:19:0:12", + "dna_hash": "4461918e985715e4a2b07494e1f91326", + "dna_len": 4935, + "aa_hash": "ffe361d9273afbab0ed9009dd5aa4041", + "aa_len": 1645, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "13": { + "parent_id": "locus_3:2:0:13", + "locus_name": "locus_3:2:0:13", + "seq_id": "locus_3:2:0:13", + "dna_hash": "670705cd2a59c4a23a897ac656a888fe", + "dna_len": 327, + "aa_hash": "f46f4f7b2362abaf1440c9752e10670e", + "aa_len": 109, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "14": { + "parent_id": "locus_4:3:0:14", + "locus_name": "locus_4:3:0:14", + "seq_id": "locus_4:3:0:14", + "dna_hash": "ac1b21798c0f672ad26f5a91ea278590", + "dna_len": 417, + "aa_hash": "dbcec3a0e9ecdc165c4e9162b079f2ee", + "aa_len": 139, + "start_codon": "ctg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "15": { + "parent_id": "locus_5:4:0:15", + "locus_name": "locus_5:4:0:15", + "seq_id": "locus_5:4:0:15", + "dna_hash": "d00defcca8588f21ce16fa1d0ac13389", + "dna_len": 444, + "aa_hash": "82d8baa0a3dad18a0efd8104ee15baae", + "aa_len": 148, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "16": { + "parent_id": "locus_6:5:0:16", + "locus_name": "locus_6:5:0:16", + "seq_id": "locus_6:5:0:16", + "dna_hash": "a11561f2804e2c32c78049f8b9aeb517", + "dna_len": 543, + "aa_hash": "f7a904be6607ca5a6d9388ad9ca3693a", + "aa_len": 181, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "17": { + "parent_id": "locus_7:6:0:17", + "locus_name": "locus_7:6:0:17", + "seq_id": "locus_7:6:0:17", + "dna_hash": "dc94bf1ec4ff9bed2a1f460cbd958656", + "dna_len": 606, + "aa_hash": "da78b534d889d8f35bec304ef54f1b93", + "aa_len": 202, + "start_codon": "gtg", + "stop_codon": "tag", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "18": { + "parent_id": "locus_8:7:0:18", + "locus_name": "locus_8:7:0:18", + "seq_id": "locus_8:7:0:18", + "dna_hash": "7ebe74afecf146ec4db816c8deced64f", + "dna_len": 642, + "aa_hash": "ad5a47cb3a55985083b963b0cf67f382", + "aa_len": 214, + "start_codon": "atg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "19": { + "parent_id": "locus_9:8:0:19", + "locus_name": "locus_9:8:0:19", + "seq_id": "locus_9:8:0:19", + "dna_hash": "41ebb36872854b2b33c8c028e23d8ad1", + "dna_len": 684, + "aa_hash": "02989d1dbb505d15f06e76ddc9174b7e", + "aa_len": 228, + "start_codon": "atg", + "stop_codon": "tag", + "count_internal_stop": 0, + "dna_ambig_count": 0 + } + }, + "query_hit_columns": [], + "query_hits": { + "0": { + "nucleotide": [ + { + "qseqid": 0, + "sseqid": 0, + "qlen": 102, + "slen": 102, + "qstart": 1, + "qend": 102, + "sstart": 1, + "send": 102, + "length": 102, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 1.8e-51, + "bitscore": 189 + } + ], + "protein": [ + { + "qseqid": 0, + "sseqid": 0, + "qlen": 34, + "slen": 34, + "qstart": 1, + "qend": 34, + "sstart": 1, + "send": 34, + "length": 34, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 4.94e-20, + "bitscore": 64.3 + } + ] + }, + "1": { + "nucleotide": [ + { + "qseqid": 1, + "sseqid": 9, + "qlen": 762, + "slen": 762, + "qstart": 4, + "qend": 762, + "sstart": 4, + "send": 762, + "length": 759, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 99, + "qcovs": 99, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1402 + } + ], + "protein": [ + { + "qseqid": 1, + "sseqid": 9, + "qlen": 254, + "slen": 254, + "qstart": 2, + "qend": 254, + "sstart": 2, + "send": 254, + "length": 253, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 99, + "qcovs": 99, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 512.0 + } + ] + }, + "2": { + "nucleotide": [ + { + "qseqid": 2, + "sseqid": 10, + "qlen": 858, + "slen": 858, + "qstart": 1, + "qend": 858, + "sstart": 1, + "send": 858, + "length": 858, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1585 + } + ], + "protein": [ + { + "qseqid": 2, + "sseqid": 10, + "qlen": 286, + "slen": 286, + "qstart": 1, + "qend": 286, + "sstart": 1, + "send": 286, + "length": 286, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 579.0 + } + ] + }, + "3": { + "nucleotide": [ + { + "qseqid": 3, + "sseqid": 11, + "qlen": 972, + "slen": 972, + "qstart": 1, + "qend": 972, + "sstart": 1, + "send": 972, + "length": 972, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1796 + } + ], + "protein": [ + { + "qseqid": 3, + "sseqid": 11, + "qlen": 324, + "slen": 324, + "qstart": 1, + "qend": 324, + "sstart": 1, + "send": 324, + "length": 324, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 640.0 + } + ] + }, + "4": { + "nucleotide": [ + { + "qseqid": 4, + "sseqid": 12, + "qlen": 1098, + "slen": 1098, + "qstart": 1, + "qend": 1098, + "sstart": 1, + "send": 1098, + "length": 1098, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 2028 + } + ], + "protein": [ + { + "qseqid": 4, + "sseqid": 12, + "qlen": 366, + "slen": 366, + "qstart": 1, + "qend": 366, + "sstart": 1, + "send": 366, + "length": 366, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 754.0 + } + ] + }, + "5": { + "nucleotide": [ + { + "qseqid": 5, + "sseqid": 13, + "qlen": 1281, + "slen": 1281, + "qstart": 1, + "qend": 1281, + "sstart": 1, + "send": 1281, + "length": 1281, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 2366 + } + ], + "protein": [ + { + "qseqid": 5, + "sseqid": 13, + "qlen": 427, + "slen": 427, + "qstart": 1, + "qend": 427, + "sstart": 1, + "send": 427, + "length": 427, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 869.0 + } + ] + }, + "6": { + "nucleotide": [ + { + "qseqid": 6, + "sseqid": 14, + "qlen": 1434, + "slen": 1434, + "qstart": 1, + "qend": 1434, + "sstart": 1, + "send": 1434, + "length": 1434, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 2649 + } + ], + "protein": [ + { + "qseqid": 6, + "sseqid": 14, + "qlen": 478, + "slen": 478, + "qstart": 1, + "qend": 478, + "sstart": 1, + "send": 478, + "length": 478, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 972.0 + } + ] + }, + "7": { + "nucleotide": [ + { + "qseqid": 7, + "sseqid": 15, + "qlen": 1464, + "slen": 1464, + "qstart": 1, + "qend": 1464, + "sstart": 1, + "send": 1464, + "length": 1464, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 2704 + } + ], + "protein": [ + { + "qseqid": 7, + "sseqid": 15, + "qlen": 488, + "slen": 488, + "qstart": 1, + "qend": 488, + "sstart": 1, + "send": 488, + "length": 488, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 1004.0 + } + ] + }, + "8": { + "nucleotide": [ + { + "qseqid": 8, + "sseqid": 16, + "qlen": 1836, + "slen": 1836, + "qstart": 1, + "qend": 1836, + "sstart": 1, + "send": 1836, + "length": 1836, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 3391 + } + ], + "protein": [ + { + "qseqid": 8, + "sseqid": 16, + "qlen": 612, + "slen": 612, + "qstart": 1, + "qend": 612, + "sstart": 1, + "send": 612, + "length": 612, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 1241.0 + } + ] + }, + "9": { + "nucleotide": [ + { + "qseqid": 9, + "sseqid": 17, + "qlen": 1914, + "slen": 1914, + "qstart": 1, + "qend": 1914, + "sstart": 1, + "send": 1914, + "length": 1914, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 3535 + } + ], + "protein": [ + { + "qseqid": 9, + "sseqid": 17, + "qlen": 638, + "slen": 638, + "qstart": 1, + "qend": 638, + "sstart": 1, + "send": 638, + "length": 638, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 1326.0 + } + ] + }, + "10": { + "nucleotide": [ + { + "qseqid": 10, + "sseqid": 18, + "qlen": 2037, + "slen": 2037, + "qstart": 1, + "qend": 2037, + "sstart": 1, + "send": 2037, + "length": 2037, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 3762 + } + ], + "protein": [ + { + "qseqid": 10, + "sseqid": 18, + "qlen": 679, + "slen": 679, + "qstart": 1, + "qend": 679, + "sstart": 1, + "send": 679, + "length": 679, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 1419.0 + } + ] + }, + "11": { + "nucleotide": [ + { + "qseqid": 11, + "sseqid": 1, + "qlen": 285, + "slen": 285, + "qstart": 1, + "qend": 285, + "sstart": 1, + "send": 285, + "length": 285, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 1.03e-152, + "bitscore": 527 + } + ], + "protein": [ + { + "qseqid": 11, + "sseqid": 1, + "qlen": 95, + "slen": 95, + "qstart": 1, + "qend": 95, + "sstart": 1, + "send": 95, + "length": 95, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 1.2299999999999998e-68, + "bitscore": 191.0 + } + ] + }, + "12": { + "nucleotide": [ + { + "qseqid": 12, + "sseqid": 19, + "qlen": 4935, + "slen": 4935, + "qstart": 1, + "qend": 4935, + "sstart": 1, + "send": 4935, + "length": 4935, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 9114 + } + ], + "protein": [ + { + "qseqid": 12, + "sseqid": 19, + "qlen": 1645, + "slen": 1645, + "qstart": 1, + "qend": 1645, + "sstart": 1, + "send": 1645, + "length": 1645, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 3332.0 + } + ] + }, + "13": { + "nucleotide": [ + { + "qseqid": 13, + "sseqid": 2, + "qlen": 327, + "slen": 327, + "qstart": 1, + "qend": 327, + "sstart": 1, + "send": 327, + "length": 327, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 5.34e-176, + "bitscore": 604 + } + ], + "protein": [ + { + "qseqid": 13, + "sseqid": 2, + "qlen": 109, + "slen": 109, + "qstart": 1, + "qend": 109, + "sstart": 1, + "send": 109, + "length": 109, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 3.09e-81, + "bitscore": 224.0 + } + ] + }, + "14": { + "nucleotide": [ + { + "qseqid": 14, + "sseqid": 3, + "qlen": 417, + "slen": 417, + "qstart": 1, + "qend": 417, + "sstart": 1, + "send": 417, + "length": 417, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 771 + } + ], + "protein": [ + { + "qseqid": 14, + "sseqid": 3, + "qlen": 139, + "slen": 139, + "qstart": 1, + "qend": 139, + "sstart": 1, + "send": 139, + "length": 139, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 2.34e-106, + "bitscore": 290.0 + } + ] + }, + "15": { + "nucleotide": [ + { + "qseqid": 15, + "sseqid": 4, + "qlen": 444, + "slen": 444, + "qstart": 1, + "qend": 444, + "sstart": 1, + "send": 444, + "length": 444, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 821 + } + ], + "protein": [ + { + "qseqid": 15, + "sseqid": 4, + "qlen": 148, + "slen": 148, + "qstart": 1, + "qend": 148, + "sstart": 1, + "send": 148, + "length": 148, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 1.0400000000000001e-110, + "bitscore": 302.0 + } + ] + }, + "16": { + "nucleotide": [ + { + "qseqid": 16, + "sseqid": 5, + "qlen": 543, + "slen": 543, + "qstart": 1, + "qend": 543, + "sstart": 1, + "send": 543, + "length": 543, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1003 + } + ], + "protein": [ + { + "qseqid": 16, + "sseqid": 5, + "qlen": 181, + "slen": 181, + "qstart": 1, + "qend": 181, + "sstart": 1, + "send": 181, + "length": 181, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 1.7899999999999999e-140, + "bitscore": 380.0 + } + ] + }, + "17": { + "nucleotide": [ + { + "qseqid": 17, + "sseqid": 6, + "qlen": 606, + "slen": 606, + "qstart": 1, + "qend": 606, + "sstart": 1, + "send": 606, + "length": 606, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1120 + } + ], + "protein": [ + { + "qseqid": 17, + "sseqid": 6, + "qlen": 202, + "slen": 202, + "qstart": 1, + "qend": 202, + "sstart": 1, + "send": 202, + "length": 202, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 3.71e-154, + "bitscore": 416.0 + } + ] + }, + "18": { + "nucleotide": [ + { + "qseqid": 18, + "sseqid": 7, + "qlen": 642, + "slen": 642, + "qstart": 1, + "qend": 642, + "sstart": 1, + "send": 642, + "length": 642, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1186 + } + ], + "protein": [ + { + "qseqid": 18, + "sseqid": 7, + "qlen": 214, + "slen": 214, + "qstart": 1, + "qend": 214, + "sstart": 1, + "send": 214, + "length": 214, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 1.75e-157, + "bitscore": 426.0 + } + ] + }, + "19": { + "nucleotide": [ + { + "qseqid": 19, + "sseqid": 8, + "qlen": 684, + "slen": 684, + "qstart": 1, + "qend": 684, + "sstart": 1, + "send": 684, + "length": 684, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1264 + } + ], + "protein": [ + { + "qseqid": 19, + "sseqid": 8, + "qlen": 228, + "slen": 228, + "qstart": 1, + "qend": 228, + "sstart": 1, + "send": 228, + "length": 228, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 6.32e-172, + "bitscore": 463.0 + } + ] + } + }, + "locus_profile": { + "locus_1": { + "nucleotide": [ + "0" + ], + "protein": [ + "0" + ] + }, + "locus_2": { + "nucleotide": [ + "11" + ], + "protein": [ + "11" + ] + }, + "locus_3": { + "nucleotide": [ + "13" + ], + "protein": [ + "13" + ] + }, + "locus_4": { + "nucleotide": [ + "14" + ], + "protein": [ + "14" + ] + }, + "locus_5": { + "nucleotide": [ + "15" + ], + "protein": [ + "15" + ] + }, + "locus_6": { + "nucleotide": [ + "16" + ], + "protein": [ + "16" + ] + }, + "locus_7": { + "nucleotide": [ + "17" + ], + "protein": [ + "17" + ] + }, + "locus_8": { + "nucleotide": [ + "18" + ], + "protein": [ + "18" + ] + }, + "locus_9": { + "nucleotide": [ + "19" + ], + "protein": [ + "19" + ] + }, + "locus_10": { + "nucleotide": [ + "1" + ], + "protein": [ + "1" + ] + }, + "locus_11": { + "nucleotide": [ + "2" + ], + "protein": [ + "2" + ] + }, + "locus_12": { + "nucleotide": [ + "3" + ], + "protein": [ + "3" + ] + }, + "locus_13": { + "nucleotide": [ + "4" + ], + "protein": [ + "4" + ] + }, + "locus_14": { + "nucleotide": [ + "5" + ], + "protein": [ + "5" + ] + }, + "locus_15": { + "nucleotide": [ + "6" + ], + "protein": [ + "6" + ] + }, + "locus_16": { + "nucleotide": [ + "7" + ], + "protein": [ + "7" + ] + }, + "locus_17": { + "nucleotide": [ + "8" + ], + "protein": [ + "8" + ] + }, + "locus_18": { + "nucleotide": [ + "9" + ], + "protein": [ + "9" + ] + }, + "locus_19": { + "nucleotide": [ + "10" + ], + "protein": [ + "10" + ] + }, + "locus_20": { + "nucleotide": [ + "12" + ], + "protein": [ + "12" + ] + } + } + }, + "query_hit_columns": [ + "qseqid", + "sseqid", + "qlen", + "slen", + "qstart", + "qend", + "sstart", + "send", + "length", + "mismatch", + "pident", + "qcovhsp", + "qcovs", + "sstrand", + "evalue", + "bitscore" + ] +} \ No newline at end of file diff --git a/tests/test_data/outputs/search/G9/blast/nucleotide/hsps.txt b/tests/test_data/outputs/search/G9/blast/nucleotide/hsps.txt new file mode 100755 index 0000000..6654f25 --- /dev/null +++ b/tests/test_data/outputs/search/G9/blast/nucleotide/hsps.txt @@ -0,0 +1,19 @@ +0 0 102 102 1 102 1 102 102 0 100.000 100 100 plus 1.80e-51 189 +1 10 858 858 1 858 1 858 858 19 97.786 100 100 plus 0.0 1480 +2 11 972 972 1 972 1 972 972 0 100.000 100 100 plus 0.0 1796 +3 12 1098 1098 1 1098 1 1098 1098 0 100.000 100 100 plus 0.0 2028 +4 13 1281 1281 1 1281 1 1281 1281 11 99.141 100 100 plus 0.0 2305 +5 14 1434 1434 1 1434 1 1434 1434 0 100.000 100 100 plus 0.0 2649 +6 15 1464 1464 1 1464 1 1464 1464 15 98.975 100 100 plus 0.0 2621 +7 16 1836 1836 1 1836 1 1836 1836 0 100.000 100 100 plus 0.0 3391 +8 17 1914 1914 1 1914 1 1914 1914 0 100.000 100 100 plus 0.0 3535 +9 18 2037 2037 1 2037 1 2037 2037 16 99.215 100 100 plus 0.0 3674 +10 1 285 285 1 285 1 285 285 17 94.035 100 100 plus 2.31e-124 433 +11 19 4935 4935 1 4935 1 4935 4935 0 100.000 100 100 plus 0.0 9114 +12 2 327 327 1 327 1 327 327 0 100.000 100 100 plus 5.34e-176 604 +13 3 417 417 1 417 1 417 417 11 97.362 100 100 plus 0.0 710 +14 4 444 444 1 444 1 444 444 15 96.622 100 100 plus 0.0 737 +15 5 543 543 1 543 1 543 543 0 100.000 100 100 plus 0.0 1003 +16 6 606 606 1 606 1 606 606 15 97.525 100 100 plus 0.0 1037 +17 7 642 642 1 642 1 642 642 0 100.000 100 100 plus 0.0 1186 +18 8 684 684 1 684 1 684 684 0 100.000 100 100 plus 0.0 1264 diff --git a/tests/test_data/outputs/search/G9/blast/nucleotide/queries.fasta b/tests/test_data/outputs/search/G9/blast/nucleotide/queries.fasta new file mode 100755 index 0000000..c89bc4d --- /dev/null +++ b/tests/test_data/outputs/search/G9/blast/nucleotide/queries.fasta @@ -0,0 +1,38 @@ +>0 +atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa +>1 +gtgaacctggtgaaaaccacgggtctgcacgcggcgggcaaaggcattaacgttgccaaagtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccgacttcaacttttccggctttgacgtgactcccgcagactgggaacgctttgttaacgactccctgagctggctgggtcagttcgatatggtctgcgtcagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatctttgatagtagccgtgaagcgttagtcgccggtcttaaagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcacgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgtgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggtaagtaccgtcggcgcaggcgattccatggttggcggattgatttacggcctgctgatgcgcgagtccagcgaacatacgctgcgtctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga +>2 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>3 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>4 +ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctcttccgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccaacgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcagaaaggcgggattggtttgcgcggtccgggtgaaacccagctcgaaaccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgaaaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgctgctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaaatccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtctgcaggtacgaatgccgattgtcgactggcgtcgcctctgtaaacaagaaccagcgttgatcgaatacgtgatctag +>5 +gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga +>6 +atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctggtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatcagcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacgctgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcattcaaccggaatggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaacgctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgaggcgctggcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtgagctgaccagccttgtgcgcaaccttaatcaactgctcaaaagcgagcgcgaacgttataacaaataccgcacgaccctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatcagcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcataccccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttttggccgacagcatcccacacagaaagaggaataa +>7 +atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa +>8 +atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag +>9 +atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacattgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccaccgtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggccgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattactggaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcattcagggcggtacggaagtttgtcagcaaaaactggataccatgcgcaacgcaggcgtaaaagtgaacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggcgtcagcgcagagatcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacaggagaccggcaagctcggcgagatcctgttctttatgcgtgcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccggcaataactggcagttcgacggcgacgcggaaactattgcccactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgaaattaccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa +>10 +atgaaactttttctgacaacggcagcgctaactgcgaccttgatttccggcatggcatttgccagcgatcccgtcatcccctgggcgactaacagcggcggtacggaaagcacgcatattgcggcaatgggcgaggatttgaatgcgcagcatcagcagatcacccacacgcatgaaggcgtatgggcggccaattccggcagcattcaggctgatgaagccgcgctgaccagcaacaagccgccggtacaaggccacccggaattgatgccgcatcagggctaa +>11 +atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa +>12 +atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa +>13 +ctgaaactaatacctttttatttgctcgcgctattttctgccgcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaaccactgcagataacccgccttataccatgacaatattgccgccagtagaaggctgcggggaaaaatgtgtgaaactgaacgtacagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaaggacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctataaccgacagctgcaattactttgggctggcgtaataccgaaaaataaaaaggggaaatggataacgaaaatggaaaattactggttcaggcaggtaaaagcgcactaa +>14 +atgaaaaaattttatagctgtcttcctgtctttttactgatcggctgtgcccaggtgcccctcccttcctccgtgagcaaaccggtacagcaacctggcgctcagaaagagcaactggccaacgcaaatagtattgatgagtgtcagtctcttccgtatgtgccgtcagaccttgcgaagaataaatcattatcaaaccagaacgctgataattccgcatcaaaaaatagcgcaatcagctcaagcattttttgcgaaaaatataaacaaaccaaagaacaggcgcttaccttcttccaggaacatccacaatacatgcgttcgaaagaagatgaagagcaactcatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa +>15 +atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa +>16 +gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggcctcacaacatgcctggagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgtcagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcaccatccagtcggcaacctggttttccgcaggcaggtcgcttacggcggataaaaaaatgatggattgcggcagcggtatttatgcttcaatcaatacgttactgaaaaagtcgcagaataaaaatatagtcatttttacccataatcattgtttgacgtatattgcaaaaaataagcgcggcgtgaagtttgatccggactatcttaatgcgctcgttatgtatgcggaaaacggaaaactctttttagacggtgaatttgttcccggctag +>17 +atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga +>18 +atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/search/G9/blast/protein/hsps.txt b/tests/test_data/outputs/search/G9/blast/protein/hsps.txt new file mode 100755 index 0000000..4a5a697 --- /dev/null +++ b/tests/test_data/outputs/search/G9/blast/protein/hsps.txt @@ -0,0 +1,19 @@ +0 0 34 34 1 34 1 34 34 0 100.000 100 100 N/A 4.94e-20 64.3 +1 10 286 286 1 286 1 286 286 17 94.056 100 100 N/A 0.0 518 +2 11 324 324 1 324 1 324 324 0 100.000 100 100 N/A 0.0 640 +3 12 366 366 1 366 1 366 366 0 100.000 100 100 N/A 0.0 754 +4 13 427 427 1 427 1 427 427 9 97.892 100 100 N/A 0.0 846 +5 14 478 478 1 478 1 478 478 0 100.000 100 100 N/A 0.0 972 +6 15 488 488 1 488 1 488 488 14 97.131 100 100 N/A 0.0 972 +7 16 612 612 1 612 1 612 612 0 100.000 100 100 N/A 0.0 1241 +8 17 638 638 1 638 1 638 638 0 100.000 100 100 N/A 0.0 1326 +9 18 679 679 1 679 1 679 679 8 98.822 100 100 N/A 0.0 1403 +10 1 95 95 1 95 1 95 95 11 88.421 100 100 N/A 2.89e-51 147 +11 19 1645 1645 1 1645 1 1645 1645 0 100.000 100 100 N/A 0.0 3332 +12 2 109 109 1 109 1 109 109 0 100.000 100 100 N/A 3.09e-81 224 +13 3 139 139 1 139 1 139 139 9 93.525 100 100 N/A 3.70e-98 269 +14 4 148 148 1 148 1 148 148 11 92.568 100 100 N/A 1.56e-97 268 +15 5 181 181 1 181 1 181 181 0 100.000 100 100 N/A 1.79e-140 380 +16 6 202 202 1 202 1 202 202 14 93.069 100 100 N/A 2.36e-141 384 +17 7 214 214 1 214 1 214 214 0 100.000 100 100 N/A 1.75e-157 426 +18 8 228 228 1 228 1 228 228 0 100.000 100 100 N/A 6.32e-172 463 diff --git a/tests/test_data/outputs/search/G9/blast/protein/queries.fasta b/tests/test_data/outputs/search/G9/blast/protein/queries.fasta new file mode 100755 index 0000000..26b5df4 --- /dev/null +++ b/tests/test_data/outputs/search/G9/blast/protein/queries.fasta @@ -0,0 +1,38 @@ +>0 +MYDPPFLEALMITASFFAIFIIIVVSVLLLEGD* +>1 +VNLVKTTGLHAAGKGINVAKVLKDLGIDVTVGGFLGKDNQDGFQQLFSELGIANRFQVVQGRTRINVKLTEKDGEVTDFNFSGFDVTPADWERFVNDSLSWLGQFDMVCVSGSLPAGVSPEAFTDWMTRLRSQCPCIIFDSSREALVAGLKAAPWLVKPNRRELEIWAGRKLPEMKDVIDAAHALREQGIAHVVISLGAEGALWVNASGEWIAKPPAVDVVSTVGAGDSMVGGLIYGLLMRESSEHTLRLATAVAALAVSQSNVGITDRPQLAAMMARVDLQPFN* +>2 +MNKNKYSTPLLMLATILAGMLSPMQSAVNGQLGHWLQDGNACAVISFASGLVVMFFIIIARKETRQQFASIPTLIKKRKIPLWNWFAGLCGAMVVFSEGASASALGVATFQTALISALLLSGLLCDRFGIGVEEKKYFTPWRITGALFAVIATIFVVSPQWHSTSFILLAILPFLAGLLAGWQPAGNAKVAEATGSMLVSITWNFIVGFCVLGAALAIRIALGHVTIQLPDTWWMYLGGPLGLLSIGLMAILVRGLGLLMLGVASTAGQLLGSVLIDELIPSLGNTVYLVTIIGTLFALVGAIVTTIPEYRASKMAQKMEVSE* +>3 +MKGRWAKYVATGVMLAMLAACSSKPTDRGQQYKDGKFTQPFSLVNQPDAVGAPINAGDFAEQVDQIRSASPRLYTNQSNVYNAVQNWLRSGGDTRTMRQFGIDAWQMEGTDNYGNVQFTGYYTPVVQARHTRQGAFQYPIYSMPPKRGRLPSRAQIYAGALSDKYILAWSNSLMDNFIMDVQGSGYIDFGDGSPLNFFSYAGKNGWPYRSIGKVLIDRGEVKKEDMSMQAIREWGEKHSEAEVRELLEQNPSFVFFKPQSFAPVKGASAVPLIGRASVASDRSIIPPGTTLLAEVPLLDNNGKFSGQYELRLMVALDVGGAIKGQHFDIYQGIGPDAGHRAGWYNHYGRVWVLKSAPGAGNVFSG* +>4 +LFDRYDAGEQAVLVHIYFSQDKDMEDLQEFESLVSSAGVEAMQVITGSRKAPHPKYFVGEGKAVEIAEAVKATGAAVVLFDHALSPAQERNLERLCECRVIDRTGLILDIFAQRARTHEGKLQVELAQLRHLATRLVRGWTHLERQKGGIGLRGPGETQLETDRRLLRNRIVQIQSRLEKVEKQREQGRQSRIKADVPTVSLVGYTNAGKSTLFNQITEARVYAADQLFATLDPTLRRIDVADVGETVLADTVGFIRHLPYDLVAAFKATLQETRQATLLLHVVDAADVRVQENIEAVNTVLEEIDAHEIPTLMVMNKIDMLDDFEPRIDRDEENKPIRVWLSAQSGVGIPQLFQALTERLSGEVAQHTLRLLPQEGRLRSRFYQLQAIEKEWMEEDGSVSLQVRMPIVDWRRLCKQEPALIEYVI* +>5 +VAQRILVLGASGYIGQHLVFALSQQGHQVRAAARRVERLEKHRLANVSCHKVDLHWPENLPALLRDIDTVYYLVHGMGEGGDFIAHERQAALNVRDALRQTPVKQLIFLSSLQAPAHEQSDHLRARQLTADTLRDAGVPVTELRAGIIVGAGSAAFEVMRDMVYNLPILTPPRWVRSRTTPIALENLLYYLVGLLEHPAHEHRILEAAGPQVLSYQQQFERFMAVSGKRRPLIPVPFPTRWISVWFLNVITSVPPTTAKALIQGLRHDLLADDAALKKLIPQTLITFDDAVRRTLKEEEKLVNSSDWGYDALAFARWRPEYGYFPKQAGFTAQTPASLSALWQVVNRLGGKEGYFFGNILWQTRAAMDRLVGHKLAKGRPSHTLLKPGDTVDSWKVIIVEPEKQLTLLFGMKAPGLGRLSFTLHDKGRYREIDVRAWWHPHGMPGLIYWLLMIPAHLFIFRGMARRIARLAEQITEK* +>6 +MNKFARHFLPLSLRVRFLLATAGVVLVLSLAYGIVALVGYSVSFDKTTFRLLRGESNLFYTLAKWENNKISVELPENLDMQSPTMTLIYDETGKLLWTQRNIPWLIKSIQPEWLKTNGFHEIETNVDATSTLLSEDHSAQEKLKEVREDDDDAEMTHSVAVNIYPATTRMPQLTIVVVDTIPIELKRSYMVWSWFVYVLAANLLLVIPLLWIAAWWSLRPIEALAREVRELEDHHREMLNPETTRELTSLVRNLNQLLKSERERYNKYRTTLTDLTHSLKTPLAVLQSTLRSLRNEKMSVSKAEPVMLEQISRISQQIGYYLHRASMRGSGVLLSRELHPVAPLLDNLISALNKVYQRKGVNISMDISPEISFVGEQNDFVEVMGNVLDNACKYCLEFVEISARQTDDHLHIFVEDDGPGIPHSKRSLVFDRGQRADTLRPGQGVGLAVAREITEQYAGQIIASDSLLGGARMEVVFGRQHPTQKEE* +>7 +MTIQKRLLEAVEQKLLRPIDAQFALTVAGNDDPAVTLAAALLSHDAGEGHVCLPLSRLTLTEEAHPLLVAWISETATPIDWKKRLLASAAVSCGDSPAPLILCGDRLYLNRMWCNERTVARFFNEVNQAIAVDEDQLSRILDALFPPTDEVNWQKVAAAVALTRRISVISGGPGTGKTTTVAKLLAALIQMADGERCRIRLAAPTGKAAARLTESLGAALRQLPLTDAQKKRIPEDASTLHRLLGAQPGSQRLRHHAGNPLHLDVLVVDEASMIDLPMMSRLIDALPPHGRVIFLGDRDQLASVEAGAVLGDICAYVNAGFTAERARQLSRLTGSAIPAGAGTQAASLRDSLCLLQKSYRFGSDSGIGKLAAAINCGDRSAIQAVFQQGFSDIEKRTLQSSDDYAGMLDEALAGYGRYLRLLHEKAAPEAILQAFNEYQLLCALREGPFGVRGLNDRIEQAMVQQRKIQRHPHSRWYEGRPVMIARNDSALGLFNGDIGIALDRGQGLRVWFVMPDGTIKSVQPSRLPEHDTTWAMTVHKSQGSEFDHAALILPSQRSPVVTRELVYTAVTRARRRLSLYADERILAGAIVTRTERRSGLATLFDEVSRIG* +>8 +MQEVAMSSQEASKMLRTYNIAWWGNNYYDVNELGHISVCPDPDVPEARVDLAKLVKAREAQGQRLPALFCFPQILQHRLRSINAAFKRARESYGYNGDYFLVYPIKVNQHRRVIESLIHSGEPLGLEAGSKAELMAVLAHAGMTRSVIVCNGYKDREYIRLALIGEKMGHKVYLVIEKMSEIAIVLEEAERLNVVPRLGVRARLASQGSGKWQSSGGEKSKFGLAATQVLQLVETLRDAGRLDSLQLLHFHLGSQMANIRDIATGVRESARFYVELHKLGVNIQCFDVGGGLGVDYEGTRSQSDCSVNYGLNEYANNIIWAIGDACEEHGLPHPTVITESGRAVTAHHTVLVSNIIGVERNEYTDPTAPAEDAPRALQNLWETWQEMHKPGTRRSLREWLHDSQMDLHDIHIGYSSGAFSLQERAWAEQLYLSMCHEVQKQLDPQNRAHRPIIDELQERMADKMYVNFSLFQSMPDAWGIDQLFPVLPLEGLDQVPERRAVLLDITCDSDGAIDHYIDGDGIATTMPMPEYDPENPPMLGFFMVGAYQEILGNMHNLFGDTEAVDVFVFPDGSVEVELSDEGDTVADMLQYVQLDPKTLLTHFRDQVKQTDLDDALQQQFLEEFEAGLYGYTYLEDE* +>9 +MNSLPQRSTDFELTTSQDGFALSWQQRLILRHSAENPCLWIGAGVADIDMFRGNFSIKDKLNEKIALTEATVSELPDGWLVQFSRGATISATLRISADEAGRLTLDLQNDDLHHNRIWLRLAANPDDHIYGCGEQFSYFDLRGKPFPLWTSEQGVGRNKTSYVTWQADCKENAGGDYYWTFFPQPTFVSTQKYYCHVDNSCYMNFDFSAPEYHELALWEDKTTLRFECADTYIALLEKLTALLGRQPELPDWVYDGVTLGIQGGTEVCQQKLDTMRNAGVKVNGIWAQDWSGIRMTSFGKRVMWNWKWNSDNYPQLDSRIKQWKEEGVQFLSYINPYVASDKDLCAEAARHGYLAKDATGGDYLVEFGEFYGGVVDLTNPEAYDWFKDVIKKNMIALGCSGWMADFGEYLPTDTYLHNGVSAEIMHNAWPALWAKCNYEALQETGKLGEILFFMRAGYTGSQKYSTMMWAGDQNVDWSLDDGLASVVPAALSLAMTGHGLHHSDIGGYTTLFDMKRSKELLLRWCDFSAFTPMMRTHEGNRPGNNWQFDGDAETIAHFARMTTVFTTLKPYLKQAVAQNAATGLPVMRPLFLHYENDAATYTLKYQYLLGQDLLVAPVHEQGRCDWTLYLPEDHWVNIWTGEAHHGGEITVDAPIGKPPVFYRAKSEWALLFASLRNI* +>10 +MKLFLTTAALTATLISGMAFASDPVIPWATNSGGTESTHIAAMGEDLNAQHQQITHTHEGVWAANSGSIQADEAALTSNKPPVQGHPELMPHQG* +>11 +MKHLRVVACMIMLALAGCDNNDKTAPTTKSEAPAVAQPSPAQDPSQLQKLAQQSQGKALTLLDASEAQLDGAATLVLTFSIPLDPEQDFSRVVHVVDKKSGSVDGAWELAPNLKELRLRHLEPERVLVVTVDPAVKALNNATFGKSYEKTITTRDVQPSVGFASRGSLLPGKIAEGLPVMALNVNHVDVNFFRVKPGSLASFVSQWEYRSSLSNWESDNLLKMADLVYTGRFDLNPARNTREKLLLPLSDIKPLQQAGVYVAVMNQAGHYNYSNAATLFTLSDIGVSAHRYHSRLDIFTQSLENGAAQSGIEIVLLNDKGQTLAQATSDAQGHVQLEADKAAALLLARKEGQTTLLDLTLPALDLSEFNVAGAPGYSKQFFMFGPRDLYRPGETVILNGLLRDSDGKTLPDQPVKLEVVKPDGQVMRTVVSQPENGLYRLNYPLDINAPTGLWHVRANTGDNLLRSWDFHVEDFMPERMALNLTAQKTPLAPADEVKFSVVGYYLYGAPANGNTLQGQLFLRPLRDAVAALPGFQFGNIAEENLSRSLDEVQLTLDKGGRGEVSAASQWQEAHSPLQVILQASLLESGGRPVTRRVEQAIWPADTLPGIRPQFAAKAVYDYRTDTTVNQPIVDEDSNAAFDIVYANAQGEKKAVSGLQVRLIRERRDYYWNWSESEGWQSQFDQKDLVEGEQTLDLNADETGKVSFPVEWGAYRLEVKAPNETVSSVRFWAGYSWQDNSDGSGAARPDRVTLKLDKANYRPGDTMKLHIAAPVAGKGYAMVESSDGPLWWQAIDVPAQGLELTIPVDKTWNRHDLYLSTLVVRPGDKSRSATPKRAVGLLHLPLGDDNRRLDLALESPAKMRPNQPLTVRVKASVKHGEMPKQINVLVSAVDSGVLNITDYATPDPWQAFFGQKRYGADIYDIYGQVIEGQGRLAALRFGGDGDDLTRGGKPPVNHANIIAQQAQPITLNEQGEGVVTLPIGDFNGELRVMAQAWTADDFGRGESKVVVAAPVIAELNMPRFLAGGDVSRLVLDVTNLTDRPQTLNIALAASGLLELLSQQPQPVNLAPGVRTTLFVPVRALEGFGEGEIQATISGLNLPGETLGAQHKQWQIGVRPAWPAQTVNSGIALAPGESWHVPEQHLANVSPATLQGQLLLSGKPPLNLARYIRELKAYPYGCLEQTTSGLFPALYTNAAQLQSLGITGDSDEKRRAAVDIGISRILQMQRDNGGFALWDENGAEEPWLTAYAMDFLIRAGEQGYSVPPEAINRGNERLLRYLQDPGTMLIRYSDNTQASTFAAQAYAALVLARQQKAPLGALREIWERRSQAASGLPLMQLGIALNTMGDARRGEEAITLALNTPRQDERQWIADYGSSLRDNALMLSLLEENNLRPDAQNALLSSLSEQAFGQRWLSTQENNALFLAAHSRQASAGAWQAQTSLEAQPLSGDKALTRNLDADQLAALEVTNTGSQPLWLRLDSSGYPSSAPEPASNVLQIERQILGTDGQRKSLSSLRSGELVLVWLTVVADRNVPDALVVDLLPAGLELENQNLADSSASLPESGSEVQNLLNQMQQADIQYMEFRDDRFVAAVVVNEGQPVTLVYLARAVTPGTYQLAQPQVESMYAPQWRATGASEGLLIVTP* +>12 +MAKNRSRRLRKKMHIDEFQELGFSVAWRFPEGTSEEQIDKTVDDFINDVIEPNKLAFDGSGYLAWEGLICMQEIGKCTEEHQAIVRKWLEARNLEEVRTSELFDVWWD* +>13 +LKLIPFYLLALFSAASGATEINACKDLIGTWKTTADNPPYTMTILPPVEGCGEKCVKLNVQYELDVTHRNALYCHEGQEGVKGQGPMVIAFEGAYGGHAIGTYNRQLQLLWAGVIPKNKKGKWITKMENYWFRQVKAH* +>14 +MKKFYSCLPVFLLIGCAQVPLPSSVSKPVQQPGAQKEQLANANSIDECQSLPYVPSDLAKNKSLSNQNADNSASKNSAISSSIFCEKYKQTKEQALTFFQEHPQYMRSKEDEEQLMTEFKKVLLEPGSKNLSIYQTLLAAHERLQAL* +>15 +MRIKPDDNWRWYYDEEHDRMMLDLANGMLFRSRFSRKMLTPDAFCPTGFCVDDAALYFSFEEKCRDFELTKEQRAELVLNALVAIRYLKPQMPKSWHFVAHGEMWTPGTGDAASVWLSDTAEQVNLLVVEPGENAALCLLAQPGVVIAGRTMQLGDAIKIMNDRLKPQVHCHSFSLEQAV* +>16 +VLAFTLRFIKNKRYFAILAGALVIIAGLASQHAWSGNGLPQINGKALAALAKQHPVVVLFRHAERCDRSDNTCLSDSTGITVNGAQDARALGKAFSADIQNYNLYSSNTVRTIQSATWFSAGRSLTADKKMMDCGSGIYASINTLLKKSQNKNIVIFTHNHCLTYIAKNKRGVKFDPDYLNALVMYAENGKLFLDGEFVPG* +>17 +MGSNYIVIEGLEGAGKTTARDVVVETLEQLGIRNMIFTREPGGTQLAEKLRSLVLDIRSVGDEVITDKAEVLMFYAARVQLVETVIKPALAQGVWVIGDRHDLSTQAYQGGGRGIDQTMLATLRDAVLGDFRPDLTLYLDVTPEVGLKRARARGDLDRIEQESFDFFNRTRARYLELAAQDSRIRTIDATQPLDAVMRDIRATVTKWVQEQAA* +>18 +MKHIKKSVLVVLLTSHVAHASIVVGGTRLVFDGNNDESSINVENKDSKANLVQSWLSVADPQVTNKQVFIITPPLFRLDAGQKNSIRVIRSGAPLPADRESMYWLNIKGIPSIDDNASANRVEISINTQIKLIYRPPALTKSTPDSQSQQLKWQTAGDVITVNNPTPYYMNFASVTLNSHEVKSATFVPPKSSASFKLSSTAAPHGTVTWRLISDYGMSLEPHSGSF* diff --git a/tests/test_data/outputs/search/G9/run.json b/tests/test_data/outputs/search/G9/run.json new file mode 100755 index 0000000..fae40d0 --- /dev/null +++ b/tests/test_data/outputs/search/G9/run.json @@ -0,0 +1,27 @@ +{ + "analysis_start_time": "10/06/2024 11:12:21", + "parameters": { + "query": "locidex/extract/G9/raw.extracted.seqs.fasta", + "outdir": "locidex/search/G9", + "name": "G9", + "db": "locidex/db", + "config": null, + "min_evalue": 0.0001, + "min_dna_len": 1, + "min_aa_len": 1, + "max_dna_len": 10000000, + "max_aa_len": 10000000, + "min_dna_ident": 80.0, + "min_aa_ident": 80.0, + "min_dna_match_cov": 80.0, + "min_aa_match_cov": 80.0, + "max_target_seqs": 10, + "n_threads": 8, + "format": null, + "translation_table": 11, + "annotate": false, + "force": true + }, + "result_file": "locidex/search/G9/seq_store.json", + "analysis_end_time": "10/06/2024 11:12:23" +} \ No newline at end of file diff --git a/tests/test_data/outputs/search/G9/seq_store.json b/tests/test_data/outputs/search/G9/seq_store.json new file mode 100755 index 0000000..0b7e531 --- /dev/null +++ b/tests/test_data/outputs/search/G9/seq_store.json @@ -0,0 +1,1685 @@ +{ + "db_info": { + "db_name": "Locidex Test Database", + "db_version": "1.0.0", + "db_date": "2024/06/06", + "db_author": "James Robertson", + "db_desc": "Using Salmonella GCA_000007545.2 as source data", + "db_num_seqs": 20, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" + }, + "db_seq_info": { + "0": { + "seq_id": 0, + "locus_name": "locus_1", + "locus_name_alt": "SALM_11273", + "locus_product": "!", + "locus_description": "hypothetical protein", + "locus_uid": "1", + "dna_seq_len": 102, + "dna_seq_hash": "d17b02d12afa7f832ee37df6f24a8f55", + "aa_seq_len": 34, + "aa_seq_hash": "84ac553cb45dd790c497c27152888f02", + "dna_min_len": 71.4, + "dna_max_len": 132.6, + "aa_min_len": 23.8, + "aa_max_len": 44.2, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "1": { + "seq_id": 1, + "locus_name": "locus_2", + "locus_name_alt": "SALM_120", + "locus_product": "@", + "locus_description": "outer membrane protein", + "locus_uid": "B", + "dna_seq_len": 285, + "dna_seq_hash": "e35184c8ff18e9116fc8faef20532f56", + "aa_seq_len": 95, + "aa_seq_hash": "2d9e7f7f0d11bf02db860b0799e7924d", + "dna_min_len": 199.5, + "dna_max_len": 370.5, + "aa_min_len": 66.5, + "aa_max_len": 123.5, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "2": { + "seq_id": 2, + "locus_name": "locus_3", + "locus_name_alt": "SALM_2016", + "locus_product": "#", + "locus_description": "tRNA (guanosine(46)-N7)-methyltransferase TrmB", + "locus_uid": "C", + "dna_seq_len": 327, + "dna_seq_hash": "670705cd2a59c4a23a897ac656a888fe", + "aa_seq_len": 109, + "aa_seq_hash": "4277f8bbf1fd6682001da089fea83d04", + "dna_min_len": 228.9, + "dna_max_len": 425.1, + "aa_min_len": 76.3, + "aa_max_len": 141.7, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "3": { + "seq_id": 3, + "locus_name": "locus_4", + "locus_name_alt": "SALM_8644", + "locus_product": "$", + "locus_description": "AZ624_004720", + "locus_uid": "AZ624_004720", + "dna_seq_len": 417, + "dna_seq_hash": "ac1b21798c0f672ad26f5a91ea278590", + "aa_seq_len": 139, + "aa_seq_hash": "0c25367401155278f34832f184ab44e6", + "dna_min_len": 291.9, + "dna_max_len": 542.1, + "aa_min_len": 97.3, + "aa_max_len": 180.7, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "4": { + "seq_id": 4, + "locus_name": "locus_5", + "locus_name_alt": "SALM_1876", + "locus_product": "%", + "locus_description": "SPI-1 type III secretion system invasion lipoprotein InvH", + "locus_uid": "E", + "dna_seq_len": 444, + "dna_seq_hash": "d00defcca8588f21ce16fa1d0ac13389", + "aa_seq_len": 148, + "aa_seq_hash": "c8bf12a8057fc5e541fcd4924136a40d", + "dna_min_len": 310.8, + "dna_max_len": 577.2, + "aa_min_len": 103.6, + "aa_max_len": 192.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "5": { + "seq_id": 5, + "locus_name": "locus_6", + "locus_name_alt": "SALM_640", + "locus_product": "^", + "locus_description": "MOSC domain-containing protein", + "locus_uid": "F", + "dna_seq_len": 543, + "dna_seq_hash": "a11561f2804e2c32c78049f8b9aeb517", + "aa_seq_len": 181, + "aa_seq_hash": "1f6a45ea291940ef2c17ec1cfdd5fbdd", + "dna_min_len": 380.1, + "dna_max_len": 705.9, + "aa_min_len": 126.7, + "aa_max_len": 235.3, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "6": { + "seq_id": 6, + "locus_name": "locus_7", + "locus_name_alt": "SALM_1501", + "locus_product": "&", + "locus_description": "India: Vellore", + "locus_uid": "G", + "dna_seq_len": 606, + "dna_seq_hash": "dc94bf1ec4ff9bed2a1f460cbd958656", + "aa_seq_len": 202, + "aa_seq_hash": "62252b3326997117f127efb88ff09294", + "dna_min_len": 424.2, + "dna_max_len": 787.8, + "aa_min_len": 141.4, + "aa_max_len": 262.6, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "7": { + "seq_id": 7, + "locus_name": "locus_8", + "locus_name_alt": "SALM_756", + "locus_product": "*", + "locus_description": "DNA polymerase III subunit delta'", + "locus_uid": "H", + "dna_seq_len": 642, + "dna_seq_hash": "7ebe74afecf146ec4db816c8deced64f", + "aa_seq_len": 214, + "aa_seq_hash": "2449629747a7c58f0f2cad411db87178", + "dna_min_len": 449.4, + "dna_max_len": 834.6, + "aa_min_len": 149.8, + "aa_max_len": 278.2, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "8": { + "seq_id": 8, + "locus_name": "locus_9", + "locus_name_alt": "SALM_7353", + "locus_product": "1", + "locus_description": "fimbrial assembly chaperone", + "locus_uid": "I", + "dna_seq_len": 684, + "dna_seq_hash": "41ebb36872854b2b33c8c028e23d8ad1", + "aa_seq_len": 228, + "aa_seq_hash": "a75a93991228940fb46d917f238beb5a", + "dna_min_len": 478.8, + "dna_max_len": 889.2, + "aa_min_len": 159.6, + "aa_max_len": 296.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "9": { + "seq_id": 9, + "locus_name": "locus_10", + "locus_name_alt": "SALM_1891", + "locus_product": "200.96", + "locus_description": "5'/3'-nucleotidase SurE", + "locus_uid": "J", + "dna_seq_len": 762, + "dna_seq_hash": "fe04d17ec353c08b903c85fc0ca4dc02", + "aa_seq_len": 254, + "aa_seq_hash": "bd09702e070040e0fc8d2ec3b830812c", + "dna_min_len": 533.4, + "dna_max_len": 990.6, + "aa_min_len": 177.8, + "aa_max_len": 330.2, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "10": { + "seq_id": 10, + "locus_name": "locus_11", + "locus_name_alt": "SALM_1452", + "locus_product": "|", + "locus_description": "1-phosphofructokinase", + "locus_uid": "K", + "dna_seq_len": 858, + "dna_seq_hash": "5b128d659955716833ce42f2bb060212", + "aa_seq_len": 286, + "aa_seq_hash": "4daecf1a6ccae76e1d97a4ce9ee4ff0b", + "dna_min_len": 600.6, + "dna_max_len": 1115.4, + "aa_min_len": 200.2, + "aa_max_len": 371.8, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "11": { + "seq_id": 11, + "locus_name": "locus_12", + "locus_name_alt": "SALM_11020", + "locus_product": "_", + "locus_description": "1", + "locus_uid": "L", + "dna_seq_len": 972, + "dna_seq_hash": "eb72da68c159497d5f0c8eeddc51b5ae", + "aa_seq_len": 324, + "aa_seq_hash": "a1f16dd269dc295e715f2d00f9c26b43", + "dna_min_len": 680.4, + "dna_max_len": 1263.6, + "aa_min_len": 226.8, + "aa_max_len": 421.2, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "12": { + "seq_id": 12, + "locus_name": "locus_13", + "locus_name_alt": "SALM_1934", + "locus_product": "-", + "locus_description": "3.9", + "locus_uid": "M", + "dna_seq_len": 1098, + "dna_seq_hash": "8f300259dcb46224bdc1fe5273107324", + "aa_seq_len": 366, + "aa_seq_hash": "b16bc26e42f5bc4327504b1ec8b2d53d", + "dna_min_len": 768.6, + "dna_max_len": 1427.4, + "aa_min_len": 256.2, + "aa_max_len": 475.8, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "13": { + "seq_id": 13, + "locus_name": "locus_14", + "locus_name_alt": "SALM_2871", + "locus_product": "+", + "locus_description": "@", + "locus_uid": "N", + "dna_seq_len": 1281, + "dna_seq_hash": "b9060019038526aa6fc38d2f7510edc6", + "aa_seq_len": 427, + "aa_seq_hash": "ed11561b2bedaa12c7a28eb0e9346101", + "dna_min_len": 896.7, + "dna_max_len": 1665.3, + "aa_min_len": 298.9, + "aa_max_len": 555.1, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "14": { + "seq_id": 14, + "locus_name": "locus_15", + "locus_name_alt": "SALM_583", + "locus_product": "=", + "locus_description": "DMT family transporter", + "locus_uid": "O", + "dna_seq_len": 1434, + "dna_seq_hash": "bc98c2fe196a68a79036814396513a8d", + "aa_seq_len": 478, + "aa_seq_hash": "16cb2acec887d5861327e04f2705b8ce", + "dna_min_len": 1003.8, + "dna_max_len": 1864.2, + "aa_min_len": 334.6, + "aa_max_len": 621.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "15": { + "seq_id": 15, + "locus_name": "locus_16", + "locus_name_alt": "SALM_780", + "locus_product": "<", + "locus_description": "murein transglycosylase A", + "locus_uid": "P", + "dna_seq_len": 1464, + "dna_seq_hash": "16e55766c603fe33c9e75d8e81743ae2", + "aa_seq_len": 488, + "aa_seq_hash": "b20314e55f9713235e9c4ea5817b56df", + "dna_min_len": 1024.8, + "dna_max_len": 1903.2, + "aa_min_len": 341.6, + "aa_max_len": 634.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "16": { + "seq_id": 16, + "locus_name": "locus_17", + "locus_name_alt": "SALM_1937", + "locus_product": ">", + "locus_description": "GTPase HflX", + "locus_uid": "Q", + "dna_seq_len": 1836, + "dna_seq_hash": "a0d97d985483413f3c18bfe5833ae9ce", + "aa_seq_len": 612, + "aa_seq_hash": "d6012eddc7a6e8d7d40761d00ed71a5a", + "dna_min_len": 1285.2, + "dna_max_len": 2386.8, + "aa_min_len": 428.4, + "aa_max_len": 795.6, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "17": { + "seq_id": 17, + "locus_name": "locus_18", + "locus_name_alt": "SALM_1997", + "locus_product": "?", + "locus_description": "biosynthetic arginine decarboxylase", + "locus_uid": "R", + "dna_seq_len": 1914, + "dna_seq_hash": "b3021e979faa7600756c06dfadfcf14c", + "aa_seq_len": 638, + "aa_seq_hash": "e68e83956ee1d4c685571e5348c8def1", + "dna_min_len": 1339.8, + "dna_max_len": 2488.2, + "aa_min_len": 446.6, + "aa_max_len": 829.4, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "18": { + "seq_id": 18, + "locus_name": "locus_19", + "locus_name_alt": "SALM_9926", + "locus_product": ",", + "locus_description": "https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662", + "locus_uid": "S", + "dna_seq_len": 2037, + "dna_seq_hash": "a012eee23637b48e39b00808a057e35d", + "aa_seq_len": 679, + "aa_seq_hash": "1302fea1dbd5bb756db34e09b863ad44", + "dna_min_len": 1425.9, + "dna_max_len": 2648.1, + "aa_min_len": 475.3, + "aa_max_len": 882.7, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + }, + "19": { + "seq_id": 19, + "locus_name": "locus_20", + "locus_name_alt": "SALM_6064", + "locus_product": ".", + "locus_description": "alpha-2-macroglobulin family protein", + "locus_uid": "T", + "dna_seq_len": 4935, + "dna_seq_hash": "4461918e985715e4a2b07494e1f91326", + "aa_seq_len": 1645, + "aa_seq_hash": "12ff39a1933fa478729b142976b0a659", + "dna_min_len": 3454.5, + "dna_max_len": 6415.5, + "aa_min_len": 1151.5, + "aa_max_len": 2138.5, + "dna_min_ident": 80, + "aa_min_ident": 64, + "min_dna_match_cov": 80, + "min_aa_match_cov": 80, + "count_int_stops": 0, + "dna_ambig_count": 0 + } + }, + "query_data": { + "sample_name": "G9", + "query_seq_data": { + "0": { + "parent_id": "locus_1:0:0:0", + "locus_name": "locus_1:0:0:0", + "seq_id": "locus_1:0:0:0", + "dna_hash": "d17b02d12afa7f832ee37df6f24a8f55", + "dna_len": 102, + "aa_hash": "a931d1f75114576e60538364eb01a05f", + "aa_len": 34, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "1": { + "parent_id": "locus_11:10:0:1", + "locus_name": "locus_11:10:0:1", + "seq_id": "locus_11:10:0:1", + "dna_hash": "c4266f2f24fdd8e039113c6b0955af9f", + "dna_len": 858, + "aa_hash": "9b9be0e0a2b6f84053716d6c14a0fb9a", + "aa_len": 286, + "start_codon": "gtg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "2": { + "parent_id": "locus_12:11:0:2", + "locus_name": "locus_12:11:0:2", + "seq_id": "locus_12:11:0:2", + "dna_hash": "eb72da68c159497d5f0c8eeddc51b5ae", + "dna_len": 972, + "aa_hash": "2fb77f6d0f615a030a08af74f3eda277", + "aa_len": 324, + "start_codon": "atg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "3": { + "parent_id": "locus_13:12:0:3", + "locus_name": "locus_13:12:0:3", + "seq_id": "locus_13:12:0:3", + "dna_hash": "8f300259dcb46224bdc1fe5273107324", + "dna_len": 1098, + "aa_hash": "fd5140611af6e0d9f95426bf9beaa8b1", + "aa_len": 366, + "start_codon": "atg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "4": { + "parent_id": "locus_14:13:0:4", + "locus_name": "locus_14:13:0:4", + "seq_id": "locus_14:13:0:4", + "dna_hash": "2fa0b06ed72e36b4071cab9d0b4f87d0", + "dna_len": 1281, + "aa_hash": "bf5190f310477277da454725d434a8ee", + "aa_len": 427, + "start_codon": "ttg", + "stop_codon": "tag", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "5": { + "parent_id": "locus_15:14:0:5", + "locus_name": "locus_15:14:0:5", + "seq_id": "locus_15:14:0:5", + "dna_hash": "bc98c2fe196a68a79036814396513a8d", + "dna_len": 1434, + "aa_hash": "5d8c3cf06444c3741ebfc57cc92db0d3", + "aa_len": 478, + "start_codon": "gtg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "6": { + "parent_id": "locus_16:15:0:6", + "locus_name": "locus_16:15:0:6", + "seq_id": "locus_16:15:0:6", + "dna_hash": "a9b3cb97dac3cda6e932a49bf9a507bd", + "dna_len": 1464, + "aa_hash": "3ca5f1d7b46eda9460608ef61603c12f", + "aa_len": 488, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "7": { + "parent_id": "locus_17:16:0:7", + "locus_name": "locus_17:16:0:7", + "seq_id": "locus_17:16:0:7", + "dna_hash": "a0d97d985483413f3c18bfe5833ae9ce", + "dna_len": 1836, + "aa_hash": "bdd2b967a7a6337b5973dd723a62fa43", + "aa_len": 612, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "8": { + "parent_id": "locus_18:17:0:8", + "locus_name": "locus_18:17:0:8", + "seq_id": "locus_18:17:0:8", + "dna_hash": "b3021e979faa7600756c06dfadfcf14c", + "dna_len": 1914, + "aa_hash": "42c4a831ee79a27c47138fe96829814b", + "aa_len": 638, + "start_codon": "atg", + "stop_codon": "tag", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "9": { + "parent_id": "locus_19:18:0:9", + "locus_name": "locus_19:18:0:9", + "seq_id": "locus_19:18:0:9", + "dna_hash": "de32372598811d63bcc1a0eaf6872644", + "dna_len": 2037, + "aa_hash": "a48a4e4dc8c7f61a7be06a7f72142198", + "aa_len": 679, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "10": { + "parent_id": "locus_2:1:0:10", + "locus_name": "locus_2:1:0:10", + "seq_id": "locus_2:1:0:10", + "dna_hash": "8b70e777f6bbf2c91ff75947824b5976", + "dna_len": 285, + "aa_hash": "6e403f4ed2da629ea2ebfe18278ed120", + "aa_len": 95, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "11": { + "parent_id": "locus_20:19:0:11", + "locus_name": "locus_20:19:0:11", + "seq_id": "locus_20:19:0:11", + "dna_hash": "4461918e985715e4a2b07494e1f91326", + "dna_len": 4935, + "aa_hash": "ffe361d9273afbab0ed9009dd5aa4041", + "aa_len": 1645, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "12": { + "parent_id": "locus_3:2:0:12", + "locus_name": "locus_3:2:0:12", + "seq_id": "locus_3:2:0:12", + "dna_hash": "670705cd2a59c4a23a897ac656a888fe", + "dna_len": 327, + "aa_hash": "f46f4f7b2362abaf1440c9752e10670e", + "aa_len": 109, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "13": { + "parent_id": "locus_4:3:0:13", + "locus_name": "locus_4:3:0:13", + "seq_id": "locus_4:3:0:13", + "dna_hash": "73790840c76943caac0ebb3b2b3f0b98", + "dna_len": 417, + "aa_hash": "77784601d754a5f36152853592023b08", + "aa_len": 139, + "start_codon": "ctg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "14": { + "parent_id": "locus_5:4:0:14", + "locus_name": "locus_5:4:0:14", + "seq_id": "locus_5:4:0:14", + "dna_hash": "8cf4341689dd00f74adfcc43d1f4a35e", + "dna_len": 444, + "aa_hash": "736cc3184dda2c5ac596f76753272622", + "aa_len": 148, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "15": { + "parent_id": "locus_6:5:0:15", + "locus_name": "locus_6:5:0:15", + "seq_id": "locus_6:5:0:15", + "dna_hash": "a11561f2804e2c32c78049f8b9aeb517", + "dna_len": 543, + "aa_hash": "f7a904be6607ca5a6d9388ad9ca3693a", + "aa_len": 181, + "start_codon": "atg", + "stop_codon": "taa", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "16": { + "parent_id": "locus_7:6:0:16", + "locus_name": "locus_7:6:0:16", + "seq_id": "locus_7:6:0:16", + "dna_hash": "49d9878c9d3071aa1d2f26cb947b784c", + "dna_len": 606, + "aa_hash": "a1169e1ef4c2882247a9349da07cb6bd", + "aa_len": 202, + "start_codon": "gtg", + "stop_codon": "tag", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "17": { + "parent_id": "locus_8:7:0:17", + "locus_name": "locus_8:7:0:17", + "seq_id": "locus_8:7:0:17", + "dna_hash": "7ebe74afecf146ec4db816c8deced64f", + "dna_len": 642, + "aa_hash": "ad5a47cb3a55985083b963b0cf67f382", + "aa_len": 214, + "start_codon": "atg", + "stop_codon": "tga", + "count_internal_stop": 0, + "dna_ambig_count": 0 + }, + "18": { + "parent_id": "locus_9:8:0:18", + "locus_name": "locus_9:8:0:18", + "seq_id": "locus_9:8:0:18", + "dna_hash": "41ebb36872854b2b33c8c028e23d8ad1", + "dna_len": 684, + "aa_hash": "02989d1dbb505d15f06e76ddc9174b7e", + "aa_len": 228, + "start_codon": "atg", + "stop_codon": "tag", + "count_internal_stop": 0, + "dna_ambig_count": 0 + } + }, + "query_hit_columns": [], + "query_hits": { + "0": { + "nucleotide": [ + { + "qseqid": 0, + "sseqid": 0, + "qlen": 102, + "slen": 102, + "qstart": 1, + "qend": 102, + "sstart": 1, + "send": 102, + "length": 102, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 1.8e-51, + "bitscore": 189 + } + ], + "protein": [ + { + "qseqid": 0, + "sseqid": 0, + "qlen": 34, + "slen": 34, + "qstart": 1, + "qend": 34, + "sstart": 1, + "send": 34, + "length": 34, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 4.94e-20, + "bitscore": 64.3 + } + ] + }, + "1": { + "nucleotide": [ + { + "qseqid": 1, + "sseqid": 10, + "qlen": 858, + "slen": 858, + "qstart": 1, + "qend": 858, + "sstart": 1, + "send": 858, + "length": 858, + "mismatch": 19, + "pident": 97.786, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1480 + } + ], + "protein": [ + { + "qseqid": 1, + "sseqid": 10, + "qlen": 286, + "slen": 286, + "qstart": 1, + "qend": 286, + "sstart": 1, + "send": 286, + "length": 286, + "mismatch": 17, + "pident": 94.056, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 518.0 + } + ] + }, + "2": { + "nucleotide": [ + { + "qseqid": 2, + "sseqid": 11, + "qlen": 972, + "slen": 972, + "qstart": 1, + "qend": 972, + "sstart": 1, + "send": 972, + "length": 972, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1796 + } + ], + "protein": [ + { + "qseqid": 2, + "sseqid": 11, + "qlen": 324, + "slen": 324, + "qstart": 1, + "qend": 324, + "sstart": 1, + "send": 324, + "length": 324, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 640.0 + } + ] + }, + "3": { + "nucleotide": [ + { + "qseqid": 3, + "sseqid": 12, + "qlen": 1098, + "slen": 1098, + "qstart": 1, + "qend": 1098, + "sstart": 1, + "send": 1098, + "length": 1098, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 2028 + } + ], + "protein": [ + { + "qseqid": 3, + "sseqid": 12, + "qlen": 366, + "slen": 366, + "qstart": 1, + "qend": 366, + "sstart": 1, + "send": 366, + "length": 366, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 754.0 + } + ] + }, + "4": { + "nucleotide": [ + { + "qseqid": 4, + "sseqid": 13, + "qlen": 1281, + "slen": 1281, + "qstart": 1, + "qend": 1281, + "sstart": 1, + "send": 1281, + "length": 1281, + "mismatch": 11, + "pident": 99.141, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 2305 + } + ], + "protein": [ + { + "qseqid": 4, + "sseqid": 13, + "qlen": 427, + "slen": 427, + "qstart": 1, + "qend": 427, + "sstart": 1, + "send": 427, + "length": 427, + "mismatch": 9, + "pident": 97.892, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 846.0 + } + ] + }, + "5": { + "nucleotide": [ + { + "qseqid": 5, + "sseqid": 14, + "qlen": 1434, + "slen": 1434, + "qstart": 1, + "qend": 1434, + "sstart": 1, + "send": 1434, + "length": 1434, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 2649 + } + ], + "protein": [ + { + "qseqid": 5, + "sseqid": 14, + "qlen": 478, + "slen": 478, + "qstart": 1, + "qend": 478, + "sstart": 1, + "send": 478, + "length": 478, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 972.0 + } + ] + }, + "6": { + "nucleotide": [ + { + "qseqid": 6, + "sseqid": 15, + "qlen": 1464, + "slen": 1464, + "qstart": 1, + "qend": 1464, + "sstart": 1, + "send": 1464, + "length": 1464, + "mismatch": 15, + "pident": 98.975, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 2621 + } + ], + "protein": [ + { + "qseqid": 6, + "sseqid": 15, + "qlen": 488, + "slen": 488, + "qstart": 1, + "qend": 488, + "sstart": 1, + "send": 488, + "length": 488, + "mismatch": 14, + "pident": 97.131, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 972.0 + } + ] + }, + "7": { + "nucleotide": [ + { + "qseqid": 7, + "sseqid": 16, + "qlen": 1836, + "slen": 1836, + "qstart": 1, + "qend": 1836, + "sstart": 1, + "send": 1836, + "length": 1836, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 3391 + } + ], + "protein": [ + { + "qseqid": 7, + "sseqid": 16, + "qlen": 612, + "slen": 612, + "qstart": 1, + "qend": 612, + "sstart": 1, + "send": 612, + "length": 612, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 1241.0 + } + ] + }, + "8": { + "nucleotide": [ + { + "qseqid": 8, + "sseqid": 17, + "qlen": 1914, + "slen": 1914, + "qstart": 1, + "qend": 1914, + "sstart": 1, + "send": 1914, + "length": 1914, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 3535 + } + ], + "protein": [ + { + "qseqid": 8, + "sseqid": 17, + "qlen": 638, + "slen": 638, + "qstart": 1, + "qend": 638, + "sstart": 1, + "send": 638, + "length": 638, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 1326.0 + } + ] + }, + "9": { + "nucleotide": [ + { + "qseqid": 9, + "sseqid": 18, + "qlen": 2037, + "slen": 2037, + "qstart": 1, + "qend": 2037, + "sstart": 1, + "send": 2037, + "length": 2037, + "mismatch": 16, + "pident": 99.215, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 3674 + } + ], + "protein": [ + { + "qseqid": 9, + "sseqid": 18, + "qlen": 679, + "slen": 679, + "qstart": 1, + "qend": 679, + "sstart": 1, + "send": 679, + "length": 679, + "mismatch": 8, + "pident": 98.822, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 1403.0 + } + ] + }, + "10": { + "nucleotide": [ + { + "qseqid": 10, + "sseqid": 1, + "qlen": 285, + "slen": 285, + "qstart": 1, + "qend": 285, + "sstart": 1, + "send": 285, + "length": 285, + "mismatch": 17, + "pident": 94.035, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 2.3100000000000004e-124, + "bitscore": 433 + } + ], + "protein": [ + { + "qseqid": 10, + "sseqid": 1, + "qlen": 95, + "slen": 95, + "qstart": 1, + "qend": 95, + "sstart": 1, + "send": 95, + "length": 95, + "mismatch": 11, + "pident": 88.421, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 2.89e-51, + "bitscore": 147.0 + } + ] + }, + "11": { + "nucleotide": [ + { + "qseqid": 11, + "sseqid": 19, + "qlen": 4935, + "slen": 4935, + "qstart": 1, + "qend": 4935, + "sstart": 1, + "send": 4935, + "length": 4935, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 9114 + } + ], + "protein": [ + { + "qseqid": 11, + "sseqid": 19, + "qlen": 1645, + "slen": 1645, + "qstart": 1, + "qend": 1645, + "sstart": 1, + "send": 1645, + "length": 1645, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 0.0, + "bitscore": 3332.0 + } + ] + }, + "12": { + "nucleotide": [ + { + "qseqid": 12, + "sseqid": 2, + "qlen": 327, + "slen": 327, + "qstart": 1, + "qend": 327, + "sstart": 1, + "send": 327, + "length": 327, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 5.34e-176, + "bitscore": 604 + } + ], + "protein": [ + { + "qseqid": 12, + "sseqid": 2, + "qlen": 109, + "slen": 109, + "qstart": 1, + "qend": 109, + "sstart": 1, + "send": 109, + "length": 109, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 3.09e-81, + "bitscore": 224.0 + } + ] + }, + "13": { + "nucleotide": [ + { + "qseqid": 13, + "sseqid": 3, + "qlen": 417, + "slen": 417, + "qstart": 1, + "qend": 417, + "sstart": 1, + "send": 417, + "length": 417, + "mismatch": 11, + "pident": 97.362, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 710 + } + ], + "protein": [ + { + "qseqid": 13, + "sseqid": 3, + "qlen": 139, + "slen": 139, + "qstart": 1, + "qend": 139, + "sstart": 1, + "send": 139, + "length": 139, + "mismatch": 9, + "pident": 93.525, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 3.6999999999999996e-98, + "bitscore": 269.0 + } + ] + }, + "14": { + "nucleotide": [ + { + "qseqid": 14, + "sseqid": 4, + "qlen": 444, + "slen": 444, + "qstart": 1, + "qend": 444, + "sstart": 1, + "send": 444, + "length": 444, + "mismatch": 15, + "pident": 96.622, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 737 + } + ], + "protein": [ + { + "qseqid": 14, + "sseqid": 4, + "qlen": 148, + "slen": 148, + "qstart": 1, + "qend": 148, + "sstart": 1, + "send": 148, + "length": 148, + "mismatch": 11, + "pident": 92.568, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 1.5600000000000001e-97, + "bitscore": 268.0 + } + ] + }, + "15": { + "nucleotide": [ + { + "qseqid": 15, + "sseqid": 5, + "qlen": 543, + "slen": 543, + "qstart": 1, + "qend": 543, + "sstart": 1, + "send": 543, + "length": 543, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1003 + } + ], + "protein": [ + { + "qseqid": 15, + "sseqid": 5, + "qlen": 181, + "slen": 181, + "qstart": 1, + "qend": 181, + "sstart": 1, + "send": 181, + "length": 181, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 1.7899999999999999e-140, + "bitscore": 380.0 + } + ] + }, + "16": { + "nucleotide": [ + { + "qseqid": 16, + "sseqid": 6, + "qlen": 606, + "slen": 606, + "qstart": 1, + "qend": 606, + "sstart": 1, + "send": 606, + "length": 606, + "mismatch": 15, + "pident": 97.525, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1037 + } + ], + "protein": [ + { + "qseqid": 16, + "sseqid": 6, + "qlen": 202, + "slen": 202, + "qstart": 1, + "qend": 202, + "sstart": 1, + "send": 202, + "length": 202, + "mismatch": 14, + "pident": 93.069, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 2.36e-141, + "bitscore": 384.0 + } + ] + }, + "17": { + "nucleotide": [ + { + "qseqid": 17, + "sseqid": 7, + "qlen": 642, + "slen": 642, + "qstart": 1, + "qend": 642, + "sstart": 1, + "send": 642, + "length": 642, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1186 + } + ], + "protein": [ + { + "qseqid": 17, + "sseqid": 7, + "qlen": 214, + "slen": 214, + "qstart": 1, + "qend": 214, + "sstart": 1, + "send": 214, + "length": 214, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 1.75e-157, + "bitscore": 426.0 + } + ] + }, + "18": { + "nucleotide": [ + { + "qseqid": 18, + "sseqid": 8, + "qlen": 684, + "slen": 684, + "qstart": 1, + "qend": 684, + "sstart": 1, + "send": 684, + "length": 684, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": "plus", + "evalue": 0.0, + "bitscore": 1264 + } + ], + "protein": [ + { + "qseqid": 18, + "sseqid": 8, + "qlen": 228, + "slen": 228, + "qstart": 1, + "qend": 228, + "sstart": 1, + "send": 228, + "length": 228, + "mismatch": 0, + "pident": 100.0, + "qcovhsp": 100, + "qcovs": 100, + "sstrand": NaN, + "evalue": 6.32e-172, + "bitscore": 463.0 + } + ] + } + }, + "locus_profile": { + "locus_1": { + "nucleotide": [ + "0" + ], + "protein": [ + "0" + ] + }, + "locus_2": { + "nucleotide": [ + "10" + ], + "protein": [ + "10" + ] + }, + "locus_3": { + "nucleotide": [ + "12" + ], + "protein": [ + "12" + ] + }, + "locus_4": { + "nucleotide": [ + "13" + ], + "protein": [ + "13" + ] + }, + "locus_5": { + "nucleotide": [ + "14" + ], + "protein": [ + "14" + ] + }, + "locus_6": { + "nucleotide": [ + "15" + ], + "protein": [ + "15" + ] + }, + "locus_7": { + "nucleotide": [ + "16" + ], + "protein": [ + "16" + ] + }, + "locus_8": { + "nucleotide": [ + "17" + ], + "protein": [ + "17" + ] + }, + "locus_9": { + "nucleotide": [ + "18" + ], + "protein": [ + "18" + ] + }, + "locus_10": { + "nucleotide": [], + "protein": [] + }, + "locus_11": { + "nucleotide": [ + "1" + ], + "protein": [ + "1" + ] + }, + "locus_12": { + "nucleotide": [ + "2" + ], + "protein": [ + "2" + ] + }, + "locus_13": { + "nucleotide": [ + "3" + ], + "protein": [ + "3" + ] + }, + "locus_14": { + "nucleotide": [ + "4" + ], + "protein": [ + "4" + ] + }, + "locus_15": { + "nucleotide": [ + "5" + ], + "protein": [ + "5" + ] + }, + "locus_16": { + "nucleotide": [ + "6" + ], + "protein": [ + "6" + ] + }, + "locus_17": { + "nucleotide": [ + "7" + ], + "protein": [ + "7" + ] + }, + "locus_18": { + "nucleotide": [ + "8" + ], + "protein": [ + "8" + ] + }, + "locus_19": { + "nucleotide": [ + "9" + ], + "protein": [ + "9" + ] + }, + "locus_20": { + "nucleotide": [ + "11" + ], + "protein": [ + "11" + ] + } + } + }, + "query_hit_columns": [ + "qseqid", + "sseqid", + "qlen", + "slen", + "qstart", + "qend", + "sstart", + "send", + "length", + "mismatch", + "pident", + "qcovhsp", + "qcovs", + "sstrand", + "evalue", + "bitscore" + ] +} \ No newline at end of file diff --git a/tests/test_workflows.yml b/tests/test_workflows.yml index 57bc82a..0d955f3 100644 --- a/tests/test_workflows.yml +++ b/tests/test_workflows.yml @@ -44,4 +44,213 @@ command: locidex merge -i locidex/example/merge/merge_in/report.json locidex/example/merge/merge_in/report1.json -o merged_out/ files: - path: merged_out/profile.tsv - md5sum: 4cb7a14f4b4e7d0ef4992d477d773275 \ No newline at end of file + md5sum: 4cb7a14f4b4e7d0ef4992d477d773275 + +- name: Test G1 + command: > + bash -c " + locidex extract -i tests/test_data/genomes/G1.fasta -o test_out/extract/G1 -d tests/test_data/outputs/db -n G1 && + locidex search -q test_out/extract/G1/raw.extracted.seqs.fasta -o test_out/search/G1 -d tests/test_data/outputs/db -n G1 && + locidex report -i test_out/search/G1/seq_store.json -o test_out/report/conservative/G1 -n G1 --mode conservative && + locidex report -i test_out/search/G1/seq_store.json -o test_out/report/normal/G1 -n G1 --mode normal + " + files: + - path: test_out/extract/G1/raw.extracted.seqs.fasta + - path: test_out/search/G1/seq_store.json + - path: test_out/report/conservative/G1/report.json + - path: test_out/report/normal/G1/report.json + + +- name: Test G2 + command: > + bash -c " + locidex extract -i tests/test_data/genomes/G2.fasta -o test_out/extract/G2 -d tests/test_data/outputs/db -n G2 && + locidex search -q test_out/extract/G2/raw.extracted.seqs.fasta -o test_out/search/G2 -d tests/test_data/outputs/db -n G2 && + locidex report -i test_out/search/G2/seq_store.json -o test_out/report/conservative/G2 -n G2 --mode conservative && + locidex report -i test_out/search/G2/seq_store.json -o test_out/report/normal/G2 -n G2 --mode normal + " + files: + - path: test_out/extract/G2/raw.extracted.seqs.fasta + - path: test_out/search/G2/seq_store.json + - path: test_out/report/conservative/G2/report.json + - path: test_out/report/normal/G2/report.json + + +- name: Test G3 + command: > + bash -c " + locidex extract -i tests/test_data/genomes/G3.fasta -o test_out/extract/G3 -d tests/test_data/outputs/db -n G3 && + locidex search -q test_out/extract/G3/raw.extracted.seqs.fasta -o test_out/search/G3 -d tests/test_data/outputs/db -n G3 && + locidex report -i test_out/search/G3/seq_store.json -o test_out/report/conservative/G3 -n G3 --mode conservative && + locidex report -i test_out/search/G3/seq_store.json -o test_out/report/normal/G3 -n G3 --mode normal + " + files: + - path: test_out/extract/G3/raw.extracted.seqs.fasta + - path: test_out/search/G3/seq_store.json + - path: test_out/report/conservative/G3/report.json + - path: test_out/report/normal/G3/report.json + + +- name: Test G4 + command: > + bash -c " + locidex extract -i tests/test_data/genomes/G4.fasta -o test_out/extract/G4 -d tests/test_data/outputs/db -n G4 && + locidex search -q test_out/extract/G4/raw.extracted.seqs.fasta -o test_out/search/G4 -d tests/test_data/outputs/db -n G4 && + locidex report -i test_out/search/G4/seq_store.json -o test_out/report/conservative/G4 -n G4 --mode conservative && + locidex report -i test_out/search/G4/seq_store.json -o test_out/report/normal/G4 -n G4 --mode normal + " + files: + - path: test_out/extract/G4/raw.extracted.seqs.fasta + - path: test_out/search/G4/seq_store.json + - path: test_out/report/conservative/G4/report.json + - path: test_out/report/normal/G4/report.json + + +- name: Test G5 + command: > + bash -c " + locidex extract -i tests/test_data/genomes/G5.fasta -o test_out/extract/G5 -d tests/test_data/outputs/db -n G5 && + locidex search -q test_out/extract/G5/raw.extracted.seqs.fasta -o test_out/search/G5 -d tests/test_data/outputs/db -n G5 && + locidex report -i test_out/search/G5/seq_store.json -o test_out/report/conservative/G5 -n G5 --mode conservative && + locidex report -i test_out/search/G5/seq_store.json -o test_out/report/normal/G5 -n G5 --mode normal + " + files: + - path: test_out/extract/G5/raw.extracted.seqs.fasta + - path: test_out/search/G5/seq_store.json + - path: test_out/report/conservative/G5/report.json + - path: test_out/report/normal/G5/report.json + + +- name: Test G6 + command: > + bash -c " + locidex extract -i tests/test_data/genomes/G6.fasta -o test_out/extract/G6 -d tests/test_data/outputs/db -n G6 && + locidex search -q test_out/extract/G6/raw.extracted.seqs.fasta -o test_out/search/G6 -d tests/test_data/outputs/db -n G6 && + locidex report -i test_out/search/G6/seq_store.json -o test_out/report/conservative/G6 -n G6 --mode conservative && + locidex report -i test_out/search/G6/seq_store.json -o test_out/report/normal/G6 -n G6 --mode normal + " + files: + - path: test_out/extract/G6/raw.extracted.seqs.fasta + - path: test_out/search/G6/seq_store.json + - path: test_out/report/conservative/G6/report.json + - path: test_out/report/normal/G6/report.json + + +- name: Test G7 + command: > + bash -c " + locidex extract -i tests/test_data/genomes/G7.fasta -o test_out/extract/G7 -d tests/test_data/outputs/db -n G7 && + locidex search -q test_out/extract/G7/raw.extracted.seqs.fasta -o test_out/search/G7 -d tests/test_data/outputs/db -n G7 && + locidex report -i test_out/search/G7/seq_store.json -o test_out/report/conservative/G7 -n G7 --mode conservative && + locidex report -i test_out/search/G7/seq_store.json -o test_out/report/normal/G7 -n G7 --mode normal + " + files: + - path: test_out/extract/G7/raw.extracted.seqs.fasta + - path: test_out/search/G7/seq_store.json + - path: test_out/report/conservative/G7/report.json + - path: test_out/report/normal/G7/report.json + + +- name: Test G8 + command: > + bash -c " + locidex extract -i tests/test_data/genomes/G8.fasta -o test_out/extract/G8 -d tests/test_data/outputs/db -n G8 && + locidex search -q test_out/extract/G8/raw.extracted.seqs.fasta -o test_out/search/G8 -d tests/test_data/outputs/db -n G8 && + locidex report -i test_out/search/G8/seq_store.json -o test_out/report/conservative/G8 -n G8 --mode conservative && + locidex report -i test_out/search/G8/seq_store.json -o test_out/report/normal/G8 -n G8 --mode normal + " + files: + - path: test_out/extract/G8/raw.extracted.seqs.fasta + - path: test_out/search/G8/seq_store.json + - path: test_out/report/conservative/G8/report.json + - path: test_out/report/normal/G8/report.json + + +- name: Test G9 + command: > + bash -c " + locidex extract -i tests/test_data/genomes/G9.fasta -o test_out/extract/G9 -d tests/test_data/outputs/db -n G9 && + locidex search -q test_out/extract/G9/raw.extracted.seqs.fasta -o test_out/search/G9 -d tests/test_data/outputs/db -n G9 && + locidex report -i test_out/search/G9/seq_store.json -o test_out/report/conservative/G9 -n G9 --mode conservative && + locidex report -i test_out/search/G9/seq_store.json -o test_out/report/normal/G9 -n G9 --mode normal + " + files: + - path: test_out/extract/G9/raw.extracted.seqs.fasta + - path: test_out/search/G9/seq_store.json + - path: test_out/report/conservative/G9/report.json + - path: test_out/report/normal/G9/report.json + + +- name: Test G10 + command: > + bash -c " + locidex extract -i tests/test_data/genomes/G10.fasta -o test_out/extract/G10 -d tests/test_data/outputs/db -n G10 && + locidex search -q test_out/extract/G10/raw.extracted.seqs.fasta -o test_out/search/G10 -d tests/test_data/outputs/db -n G10 && + locidex report -i test_out/search/G10/seq_store.json -o test_out/report/conservative/G10 -n G10 --mode conservative && + locidex report -i test_out/search/G10/seq_store.json -o test_out/report/normal/G10 -n G10 --mode normal + " + files: + - path: test_out/extract/G10/raw.extracted.seqs.fasta + - path: test_out/search/G10/seq_store.json + - path: test_out/report/conservative/G10/report.json + - path: test_out/report/normal/G10/report.json + + +- name: Test G11 + command: > + bash -c " + locidex extract -i tests/test_data/genomes/G11.fasta -o test_out/extract/G11 -d tests/test_data/outputs/db -n G11 && + locidex search -q test_out/extract/G11/raw.extracted.seqs.fasta -o test_out/search/G11 -d tests/test_data/outputs/db -n G11 && + locidex report -i test_out/search/G11/seq_store.json -o test_out/report/conservative/G11 -n G11 --mode conservative && + locidex report -i test_out/search/G11/seq_store.json -o test_out/report/normal/G11 -n G11 --mode normal + " + files: + - path: test_out/extract/G11/raw.extracted.seqs.fasta + - path: test_out/search/G11/seq_store.json + - path: test_out/report/conservative/G11/report.json + - path: test_out/report/normal/G11/report.json + + +- name: Test G12 + command: > + bash -c " + locidex extract -i tests/test_data/genomes/G12.fasta -o test_out/extract/G12 -d tests/test_data/outputs/db -n G12 && + locidex search -q test_out/extract/G12/raw.extracted.seqs.fasta -o test_out/search/G12 -d tests/test_data/outputs/db -n G12 && + locidex report -i test_out/search/G12/seq_store.json -o test_out/report/conservative/G12 -n G12 --mode conservative && + locidex report -i test_out/search/G12/seq_store.json -o test_out/report/normal/G12 -n G12 --mode normal + " + files: + - path: test_out/extract/G12/raw.extracted.seqs.fasta + - path: test_out/search/G12/seq_store.json + - path: test_out/report/conservative/G12/report.json + - path: test_out/report/normal/G12/report.json + + +- name: Test G13 + command: > + bash -c " + locidex extract -i tests/test_data/genomes/G13.fasta -o test_out/extract/G13 -d tests/test_data/outputs/db -n G13 && + locidex search -q test_out/extract/G13/raw.extracted.seqs.fasta -o test_out/search/G13 -d tests/test_data/outputs/db -n G13 && + locidex report -i test_out/search/G13/seq_store.json -o test_out/report/conservative/G13 -n G13 --mode conservative && + locidex report -i test_out/search/G13/seq_store.json -o test_out/report/normal/G13 -n G13 --mode normal + " + files: + - path: test_out/extract/G13/raw.extracted.seqs.fasta + - path: test_out/search/G13/seq_store.json + - path: test_out/report/conservative/G13/report.json + - path: test_out/report/normal/G13/report.json + + +- name: Test G14 + command: > + bash -c " + locidex extract -i tests/test_data/genomes/G14.fasta -o test_out/extract/G14 -d tests/test_data/outputs/db -n G14 && + locidex search -q test_out/extract/G14/raw.extracted.seqs.fasta -o test_out/search/G14 -d tests/test_data/outputs/db -n G14 && + locidex report -i test_out/search/G14/seq_store.json -o test_out/report/conservative/G14 -n G14 --mode conservative && + locidex report -i test_out/search/G14/seq_store.json -o test_out/report/normal/G14 -n G14 --mode normal + " + files: + - path: test_out/extract/G14/raw.extracted.seqs.fasta + - path: test_out/search/G14/seq_store.json + - path: test_out/report/conservative/G14/report.json + - path: test_out/report/normal/G14/report.json \ No newline at end of file From ee369e26b614fdbd7646d0b247612701b2fdc401 Mon Sep 17 00:00:00 2001 From: Matthew Wells Date: Mon, 17 Jun 2024 17:04:37 -0500 Subject: [PATCH 2/4] updated workflow tests --- tests/test_workflows.yml | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/tests/test_workflows.yml b/tests/test_workflows.yml index 0d955f3..7ed7fc8 100644 --- a/tests/test_workflows.yml +++ b/tests/test_workflows.yml @@ -56,7 +56,9 @@ " files: - path: test_out/extract/G1/raw.extracted.seqs.fasta + md5sum: 263d767c0503521372f8ea3c0689073a - path: test_out/search/G1/seq_store.json + md5sum: 9e33a22323f939371a203c47ebeede0d - path: test_out/report/conservative/G1/report.json - path: test_out/report/normal/G1/report.json @@ -71,7 +73,9 @@ " files: - path: test_out/extract/G2/raw.extracted.seqs.fasta + md5sum: a22c3896a3b2a5d8e0c0793c3848f0b2 - path: test_out/search/G2/seq_store.json + md5sum: 735c90328b01180800056f63e983c5a4 - path: test_out/report/conservative/G2/report.json - path: test_out/report/normal/G2/report.json @@ -86,7 +90,9 @@ " files: - path: test_out/extract/G3/raw.extracted.seqs.fasta + md5sum: 263d767c0503521372f8ea3c0689073a - path: test_out/search/G3/seq_store.json + md5sum: 9a5b510de5cd2f5e454f7bf9dc10accf - path: test_out/report/conservative/G3/report.json - path: test_out/report/normal/G3/report.json @@ -101,7 +107,9 @@ " files: - path: test_out/extract/G4/raw.extracted.seqs.fasta + md5sum: a22c3896a3b2a5d8e0c0793c3848f0b2 - path: test_out/search/G4/seq_store.json + md5sum: 1c150a532cbb42efe4794130b5fb5f5d - path: test_out/report/conservative/G4/report.json - path: test_out/report/normal/G4/report.json @@ -116,7 +124,9 @@ " files: - path: test_out/extract/G5/raw.extracted.seqs.fasta + md5sum: 1fa0a1e6302e05ef9b131d71577e4be8 - path: test_out/search/G5/seq_store.json + md5sum: a0bab6959b2c9294292f4499d8c4563c - path: test_out/report/conservative/G5/report.json - path: test_out/report/normal/G5/report.json @@ -131,7 +141,9 @@ " files: - path: test_out/extract/G6/raw.extracted.seqs.fasta + md5sum: 9c76d604cbf87f0ce649b0b3ebfc4854 - path: test_out/search/G6/seq_store.json + md5sum: def35eb2b6301b8ab6fb31ecaf995d78 - path: test_out/report/conservative/G6/report.json - path: test_out/report/normal/G6/report.json @@ -146,7 +158,9 @@ " files: - path: test_out/extract/G7/raw.extracted.seqs.fasta + md5sum: 14814906e4a1dadde2e422fb84aef547 - path: test_out/search/G7/seq_store.json + md5sum: 8e8ed91f9baaffc0ab27a2a26df7a647 - path: test_out/report/conservative/G7/report.json - path: test_out/report/normal/G7/report.json @@ -161,7 +175,9 @@ " files: - path: test_out/extract/G8/raw.extracted.seqs.fasta + md5sum: ca09751ff5ed7b3f5fec1a6d41696a0b - path: test_out/search/G8/seq_store.json + md5sum: 95673d95dcd5bd54afc81f8788e5ef97 - path: test_out/report/conservative/G8/report.json - path: test_out/report/normal/G8/report.json @@ -176,7 +192,9 @@ " files: - path: test_out/extract/G9/raw.extracted.seqs.fasta + md5sum: 3c98833aea5abc427222c9ef1ea0be61 - path: test_out/search/G9/seq_store.json + md5sum: c502a31310f6584a10b4378c5a1c2d82 - path: test_out/report/conservative/G9/report.json - path: test_out/report/normal/G9/report.json @@ -191,7 +209,9 @@ " files: - path: test_out/extract/G10/raw.extracted.seqs.fasta + md5sum: 3b8981e69b40ee7118d8356c89280bcb - path: test_out/search/G10/seq_store.json + md5sum: cd28ea213bd681c7abba59e6cf68bdb8 - path: test_out/report/conservative/G10/report.json - path: test_out/report/normal/G10/report.json @@ -206,7 +226,9 @@ " files: - path: test_out/extract/G11/raw.extracted.seqs.fasta + md5sum: e2bdd2194087059f1b7a7ac664d3fc3f - path: test_out/search/G11/seq_store.json + md5sum: 439b41382a87960e7d123afc060aaae8 - path: test_out/report/conservative/G11/report.json - path: test_out/report/normal/G11/report.json @@ -221,7 +243,9 @@ " files: - path: test_out/extract/G12/raw.extracted.seqs.fasta - - path: test_out/search/G12/seq_store.json + md5sum: 8eb5977e87e795e3dbc50d98af4d2b45 + - path: test_out/search/G12/seq_store. + md5sum: ba3cd7d9e5e243b85cf5cb8347fb1c3f - path: test_out/report/conservative/G12/report.json - path: test_out/report/normal/G12/report.json @@ -236,7 +260,9 @@ " files: - path: test_out/extract/G13/raw.extracted.seqs.fasta + md5sum: f5abee1fa5628d3ccffbe2b5d03e677d - path: test_out/search/G13/seq_store.json + md5sum: c5f24a2fcdc2b118485db8373f239ac5 - path: test_out/report/conservative/G13/report.json - path: test_out/report/normal/G13/report.json @@ -251,6 +277,8 @@ " files: - path: test_out/extract/G14/raw.extracted.seqs.fasta + md5sum: 0c1255c5ed4ee62d4e21eef8e3dafc06 - path: test_out/search/G14/seq_store.json + md5sum: 35aab22a5e554b3db63940b873e267b9 - path: test_out/report/conservative/G14/report.json - path: test_out/report/normal/G14/report.json \ No newline at end of file From 551aaebacece78ce50bb8e2ce202a5475ee03414 Mon Sep 17 00:00:00 2001 From: Matthew Wells Date: Wed, 19 Jun 2024 10:50:30 -0500 Subject: [PATCH 3/4] updated test outputs --- .../extract/G1/blast_db/contigs.fasta.ndb | Bin 20480 -> 32768 bytes .../extract/G1/blast_db/contigs.fasta.nhr | Bin 64 -> 40 bytes .../extract/G1/blast_db/contigs.fasta.nin | Bin 144 -> 152 bytes .../extract/G1/blast_db/contigs.fasta.njs | 10 +- .../extract/G10/blast_db/contigs.fasta.ndb | Bin 20480 -> 32768 bytes .../extract/G10/blast_db/contigs.fasta.nhr | Bin 64 -> 40 bytes .../extract/G10/blast_db/contigs.fasta.nin | Bin 152 -> 152 bytes .../extract/G10/blast_db/contigs.fasta.njs | 8 +- .../extract/G11/blast_db/contigs.fasta.ndb | Bin 20480 -> 32768 bytes .../extract/G11/blast_db/contigs.fasta.nhr | Bin 64 -> 40 bytes .../extract/G11/blast_db/contigs.fasta.nin | Bin 152 -> 152 bytes .../extract/G11/blast_db/contigs.fasta.njs | 8 +- .../extract/G12/blast_db/contigs.fasta.ndb | Bin 20480 -> 32768 bytes .../extract/G12/blast_db/contigs.fasta.nhr | Bin 64 -> 40 bytes .../extract/G12/blast_db/contigs.fasta.nin | Bin 152 -> 152 bytes .../extract/G12/blast_db/contigs.fasta.njs | 8 +- .../extract/G13/blast_db/contigs.fasta.ndb | Bin 20480 -> 32768 bytes .../extract/G13/blast_db/contigs.fasta.nhr | Bin 64 -> 40 bytes .../extract/G13/blast_db/contigs.fasta.nin | Bin 152 -> 152 bytes .../extract/G13/blast_db/contigs.fasta.njs | 8 +- .../extract/G14/blast_db/contigs.fasta.ndb | Bin 20480 -> 32768 bytes .../extract/G14/blast_db/contigs.fasta.nhr | Bin 64 -> 40 bytes .../extract/G14/blast_db/contigs.fasta.nin | Bin 152 -> 152 bytes .../extract/G14/blast_db/contigs.fasta.njs | 8 +- .../extract/G2/blast_db/contigs.fasta.ndb | Bin 20480 -> 32768 bytes .../extract/G2/blast_db/contigs.fasta.nhr | Bin 64 -> 40 bytes .../extract/G2/blast_db/contigs.fasta.nin | Bin 144 -> 152 bytes .../extract/G2/blast_db/contigs.fasta.njs | 10 +- .../extract/G3/blast_db/contigs.fasta.ndb | Bin 20480 -> 32768 bytes .../extract/G3/blast_db/contigs.fasta.nhr | Bin 64 -> 40 bytes .../extract/G3/blast_db/contigs.fasta.nin | Bin 144 -> 152 bytes .../extract/G3/blast_db/contigs.fasta.njs | 10 +- .../extract/G4/blast_db/contigs.fasta.ndb | Bin 20480 -> 32768 bytes .../extract/G4/blast_db/contigs.fasta.nhr | Bin 64 -> 40 bytes .../extract/G4/blast_db/contigs.fasta.nin | Bin 144 -> 152 bytes .../extract/G4/blast_db/contigs.fasta.njs | 10 +- .../extract/G5/blast_db/contigs.fasta.ndb | Bin 20480 -> 32768 bytes .../extract/G5/blast_db/contigs.fasta.nhr | Bin 64 -> 40 bytes .../extract/G5/blast_db/contigs.fasta.nin | Bin 144 -> 152 bytes .../extract/G5/blast_db/contigs.fasta.njs | 10 +- .../extract/G6/blast_db/contigs.fasta.ndb | Bin 20480 -> 32768 bytes .../extract/G6/blast_db/contigs.fasta.nhr | Bin 64 -> 40 bytes .../extract/G6/blast_db/contigs.fasta.nin | Bin 144 -> 152 bytes .../extract/G6/blast_db/contigs.fasta.njs | 10 +- .../extract/G7/blast_db/contigs.fasta.ndb | Bin 20480 -> 32768 bytes .../extract/G7/blast_db/contigs.fasta.nhr | Bin 64 -> 40 bytes .../extract/G7/blast_db/contigs.fasta.nin | Bin 144 -> 152 bytes .../extract/G7/blast_db/contigs.fasta.njs | 10 +- .../extract/G8/blast_db/contigs.fasta.ndb | Bin 20480 -> 32768 bytes .../extract/G8/blast_db/contigs.fasta.nhr | Bin 64 -> 40 bytes .../extract/G8/blast_db/contigs.fasta.nin | Bin 144 -> 152 bytes .../extract/G8/blast_db/contigs.fasta.njs | 10 +- .../extract/G9/blast_db/contigs.fasta.ndb | Bin 20480 -> 32768 bytes .../extract/G9/blast_db/contigs.fasta.nhr | Bin 64 -> 40 bytes .../extract/G9/blast_db/contigs.fasta.nin | Bin 144 -> 152 bytes .../extract/G9/blast_db/contigs.fasta.njs | 10 +- .../blast/nucleotide/nucleotide.fasta | 40 -- .../blast/nucleotide/nucleotide.ndb | Bin 20480 -> 0 bytes .../blast/nucleotide/nucleotide.nhr | Bin 1290 -> 0 bytes .../blast/nucleotide/nucleotide.nin | Bin 372 -> 0 bytes .../blast/nucleotide/nucleotide.njs | 22 - .../blast/nucleotide/nucleotide.not | Bin 248 -> 0 bytes .../blast/nucleotide/nucleotide.nsq | Bin 5674 -> 0 bytes .../blast/nucleotide/nucleotide.ntf | Bin 16384 -> 0 bytes .../blast/nucleotide/nucleotide.nto | Bin 84 -> 0 bytes .../locidex_db/blast/protein/protein.fasta | 40 -- .../locidex_db/blast/protein/protein.pdb | Bin 20480 -> 0 bytes .../locidex_db/blast/protein/protein.phr | Bin 1290 -> 0 bytes .../locidex_db/blast/protein/protein.pin | Bin 280 -> 0 bytes .../locidex_db/blast/protein/protein.pjs | 22 - .../locidex_db/blast/protein/protein.pot | Bin 248 -> 0 bytes .../locidex_db/blast/protein/protein.psq | Bin 7568 -> 0 bytes .../locidex_db/blast/protein/protein.ptf | Bin 16384 -> 0 bytes .../locidex_db/blast/protein/protein.pto | Bin 84 -> 0 bytes .../test_data/outputs/locidex_db/config.json | 12 - tests/test_data/outputs/locidex_db/meta.json | 455 ------------------ .../test_data/outputs/locidex_db/results.json | 15 - .../outputs/merge/conservative/profile.tsv | 15 - .../profile_dists/allele_map.json | 78 --- .../profile_dists/query_profile.text | 15 - .../profile_dists/ref_profile.text | 15 - .../conservative/profile_dists/results.text | 15 - .../merge/conservative/profile_dists/run.json | 38 -- .../outputs/merge/normal/profile.tsv | 15 - .../normal/profile_dists/allele_map.json | 78 --- .../normal/profile_dists/query_profile.text | 15 - .../normal/profile_dists/ref_profile.text | 15 - .../merge/normal/profile_dists/results.text | 15 - .../merge/normal/profile_dists/run.json | 38 -- .../report/conservative/G1/profile.json | 24 - .../report/conservative/G10/profile.json | 24 - .../report/conservative/G11/profile.json | 24 - .../report/conservative/G12/profile.json | 24 - .../report/conservative/G13/profile.json | 24 - .../report/conservative/G14/profile.json | 24 - .../report/conservative/G2/profile.json | 24 - .../report/conservative/G3/profile.json | 24 - .../report/conservative/G4/profile.json | 24 - .../report/conservative/G5/profile.json | 24 - .../report/conservative/G6/profile.json | 24 - .../report/conservative/G7/profile.json | 24 - .../report/conservative/G8/profile.json | 24 - .../report/conservative/G9/profile.json | 24 - .../outputs/report/normal/G1/profile.json | 24 - .../outputs/report/normal/G10/profile.json | 24 - .../outputs/report/normal/G11/profile.json | 24 - .../outputs/report/normal/G12/profile.json | 24 - .../outputs/report/normal/G13/profile.json | 24 - .../outputs/report/normal/G14/profile.json | 24 - .../outputs/report/normal/G2/profile.json | 24 - .../outputs/report/normal/G3/profile.json | 24 - .../outputs/report/normal/G4/profile.json | 24 - .../outputs/report/normal/G5/profile.json | 24 - .../outputs/report/normal/G6/profile.json | 24 - .../outputs/report/normal/G7/profile.json | 24 - .../outputs/report/normal/G8/profile.json | 24 - .../outputs/report/normal/G9/profile.json | 24 - .../search/G1/blast/nucleotide/hsps.txt | 20 - .../search/G1/blast/nucleotide/queries.fasta | 40 -- .../outputs/search/G1/blast/protein/hsps.txt | 20 - .../search/G1/blast/protein/queries.fasta | 40 -- tests/test_data/outputs/search/G1/run.json | 26 +- .../outputs/search/G1/seq_store.json | 160 +++--- .../search/G10/blast/nucleotide/hsps.txt | 19 - .../search/G10/blast/nucleotide/queries.fasta | 38 -- .../outputs/search/G10/blast/protein/hsps.txt | 19 - .../search/G10/blast/protein/queries.fasta | 38 -- tests/test_data/outputs/search/G10/run.json | 26 +- .../outputs/search/G10/seq_store.json | 152 +++--- .../search/G11/blast/nucleotide/hsps.txt | 21 - .../search/G11/blast/nucleotide/queries.fasta | 42 -- .../outputs/search/G11/blast/protein/hsps.txt | 21 - .../search/G11/blast/protein/queries.fasta | 42 -- tests/test_data/outputs/search/G11/run.json | 26 +- .../outputs/search/G11/seq_store.json | 168 ++++--- .../search/G12/blast/nucleotide/hsps.txt | 21 - .../search/G12/blast/nucleotide/queries.fasta | 42 -- .../outputs/search/G12/blast/protein/hsps.txt | 21 - .../search/G12/blast/protein/queries.fasta | 42 -- tests/test_data/outputs/search/G12/run.json | 26 +- .../outputs/search/G12/seq_store.json | 176 ++++--- .../search/G13/blast/nucleotide/hsps.txt | 21 - .../search/G13/blast/nucleotide/queries.fasta | 42 -- .../outputs/search/G13/blast/protein/hsps.txt | 21 - .../search/G13/blast/protein/queries.fasta | 42 -- tests/test_data/outputs/search/G13/run.json | 26 +- .../outputs/search/G13/seq_store.json | 168 ++++--- .../search/G14/blast/nucleotide/hsps.txt | 21 - .../search/G14/blast/nucleotide/queries.fasta | 42 -- .../outputs/search/G14/blast/protein/hsps.txt | 21 - .../search/G14/blast/protein/queries.fasta | 42 -- tests/test_data/outputs/search/G14/run.json | 26 +- .../outputs/search/G14/seq_store.json | 176 ++++--- .../search/G2/blast/nucleotide/hsps.txt | 20 - .../search/G2/blast/nucleotide/queries.fasta | 40 -- .../outputs/search/G2/blast/protein/hsps.txt | 20 - .../search/G2/blast/protein/queries.fasta | 40 -- tests/test_data/outputs/search/G2/run.json | 26 +- .../outputs/search/G2/seq_store.json | 160 +++--- .../search/G3/blast/nucleotide/hsps.txt | 20 - .../search/G3/blast/nucleotide/queries.fasta | 40 -- .../outputs/search/G3/blast/protein/hsps.txt | 20 - .../search/G3/blast/protein/queries.fasta | 40 -- tests/test_data/outputs/search/G3/run.json | 26 +- .../outputs/search/G3/seq_store.json | 160 +++--- .../search/G4/blast/nucleotide/hsps.txt | 20 - .../search/G4/blast/nucleotide/queries.fasta | 40 -- .../outputs/search/G4/blast/protein/hsps.txt | 20 - .../search/G4/blast/protein/queries.fasta | 40 -- tests/test_data/outputs/search/G4/run.json | 26 +- .../outputs/search/G4/seq_store.json | 160 +++--- .../search/G5/blast/nucleotide/hsps.txt | 20 - .../search/G5/blast/nucleotide/queries.fasta | 40 -- .../outputs/search/G5/blast/protein/hsps.txt | 20 - .../search/G5/blast/protein/queries.fasta | 40 -- tests/test_data/outputs/search/G5/run.json | 26 +- .../outputs/search/G5/seq_store.json | 160 +++--- .../search/G6/blast/nucleotide/hsps.txt | 20 - .../search/G6/blast/nucleotide/queries.fasta | 40 -- .../outputs/search/G6/blast/protein/hsps.txt | 20 - .../search/G6/blast/protein/queries.fasta | 40 -- tests/test_data/outputs/search/G6/run.json | 26 +- .../outputs/search/G6/seq_store.json | 160 +++--- .../search/G7/blast/nucleotide/hsps.txt | 20 - .../search/G7/blast/nucleotide/queries.fasta | 40 -- .../outputs/search/G7/blast/protein/hsps.txt | 20 - .../search/G7/blast/protein/queries.fasta | 40 -- tests/test_data/outputs/search/G7/run.json | 26 +- .../outputs/search/G7/seq_store.json | 160 +++--- .../search/G8/blast/nucleotide/hsps.txt | 20 - .../search/G8/blast/nucleotide/queries.fasta | 40 -- .../outputs/search/G8/blast/protein/hsps.txt | 20 - .../search/G8/blast/protein/queries.fasta | 40 -- tests/test_data/outputs/search/G8/run.json | 26 +- .../outputs/search/G8/seq_store.json | 160 +++--- .../search/G9/blast/nucleotide/hsps.txt | 19 - .../search/G9/blast/nucleotide/queries.fasta | 38 -- .../outputs/search/G9/blast/protein/hsps.txt | 19 - .../search/G9/blast/protein/queries.fasta | 38 -- tests/test_data/outputs/search/G9/run.json | 26 +- .../outputs/search/G9/seq_store.json | 152 +++--- tests/test_workflows.yml | 30 +- 202 files changed, 1792 insertions(+), 4326 deletions(-) delete mode 100755 tests/test_data/outputs/locidex_db/blast/nucleotide/nucleotide.fasta delete mode 100755 tests/test_data/outputs/locidex_db/blast/nucleotide/nucleotide.ndb delete mode 100755 tests/test_data/outputs/locidex_db/blast/nucleotide/nucleotide.nhr delete mode 100755 tests/test_data/outputs/locidex_db/blast/nucleotide/nucleotide.nin delete mode 100755 tests/test_data/outputs/locidex_db/blast/nucleotide/nucleotide.njs delete mode 100755 tests/test_data/outputs/locidex_db/blast/nucleotide/nucleotide.not delete mode 100755 tests/test_data/outputs/locidex_db/blast/nucleotide/nucleotide.nsq delete mode 100755 tests/test_data/outputs/locidex_db/blast/nucleotide/nucleotide.ntf delete mode 100755 tests/test_data/outputs/locidex_db/blast/nucleotide/nucleotide.nto delete mode 100755 tests/test_data/outputs/locidex_db/blast/protein/protein.fasta delete mode 100755 tests/test_data/outputs/locidex_db/blast/protein/protein.pdb delete mode 100755 tests/test_data/outputs/locidex_db/blast/protein/protein.phr delete mode 100755 tests/test_data/outputs/locidex_db/blast/protein/protein.pin delete mode 100755 tests/test_data/outputs/locidex_db/blast/protein/protein.pjs delete mode 100755 tests/test_data/outputs/locidex_db/blast/protein/protein.pot delete mode 100755 tests/test_data/outputs/locidex_db/blast/protein/protein.psq delete mode 100755 tests/test_data/outputs/locidex_db/blast/protein/protein.ptf delete mode 100755 tests/test_data/outputs/locidex_db/blast/protein/protein.pto delete mode 100755 tests/test_data/outputs/locidex_db/config.json delete mode 100755 tests/test_data/outputs/locidex_db/meta.json delete mode 100755 tests/test_data/outputs/locidex_db/results.json delete mode 100755 tests/test_data/outputs/merge/conservative/profile.tsv delete mode 100755 tests/test_data/outputs/merge/conservative/profile_dists/allele_map.json delete mode 100755 tests/test_data/outputs/merge/conservative/profile_dists/query_profile.text delete mode 100755 tests/test_data/outputs/merge/conservative/profile_dists/ref_profile.text delete mode 100755 tests/test_data/outputs/merge/conservative/profile_dists/results.text delete mode 100755 tests/test_data/outputs/merge/conservative/profile_dists/run.json delete mode 100755 tests/test_data/outputs/merge/normal/profile.tsv delete mode 100755 tests/test_data/outputs/merge/normal/profile_dists/allele_map.json delete mode 100755 tests/test_data/outputs/merge/normal/profile_dists/query_profile.text delete mode 100755 tests/test_data/outputs/merge/normal/profile_dists/ref_profile.text delete mode 100755 tests/test_data/outputs/merge/normal/profile_dists/results.text delete mode 100755 tests/test_data/outputs/merge/normal/profile_dists/run.json delete mode 100755 tests/test_data/outputs/report/conservative/G1/profile.json delete mode 100755 tests/test_data/outputs/report/conservative/G10/profile.json delete mode 100755 tests/test_data/outputs/report/conservative/G11/profile.json delete mode 100755 tests/test_data/outputs/report/conservative/G12/profile.json delete mode 100755 tests/test_data/outputs/report/conservative/G13/profile.json delete mode 100755 tests/test_data/outputs/report/conservative/G14/profile.json delete mode 100755 tests/test_data/outputs/report/conservative/G2/profile.json delete mode 100755 tests/test_data/outputs/report/conservative/G3/profile.json delete mode 100755 tests/test_data/outputs/report/conservative/G4/profile.json delete mode 100755 tests/test_data/outputs/report/conservative/G5/profile.json delete mode 100755 tests/test_data/outputs/report/conservative/G6/profile.json delete mode 100755 tests/test_data/outputs/report/conservative/G7/profile.json delete mode 100755 tests/test_data/outputs/report/conservative/G8/profile.json delete mode 100755 tests/test_data/outputs/report/conservative/G9/profile.json delete mode 100755 tests/test_data/outputs/report/normal/G1/profile.json delete mode 100755 tests/test_data/outputs/report/normal/G10/profile.json delete mode 100755 tests/test_data/outputs/report/normal/G11/profile.json delete mode 100755 tests/test_data/outputs/report/normal/G12/profile.json delete mode 100755 tests/test_data/outputs/report/normal/G13/profile.json delete mode 100755 tests/test_data/outputs/report/normal/G14/profile.json delete mode 100755 tests/test_data/outputs/report/normal/G2/profile.json delete mode 100755 tests/test_data/outputs/report/normal/G3/profile.json delete mode 100755 tests/test_data/outputs/report/normal/G4/profile.json delete mode 100755 tests/test_data/outputs/report/normal/G5/profile.json delete mode 100755 tests/test_data/outputs/report/normal/G6/profile.json delete mode 100755 tests/test_data/outputs/report/normal/G7/profile.json delete mode 100755 tests/test_data/outputs/report/normal/G8/profile.json delete mode 100755 tests/test_data/outputs/report/normal/G9/profile.json delete mode 100755 tests/test_data/outputs/search/G1/blast/nucleotide/hsps.txt delete mode 100755 tests/test_data/outputs/search/G1/blast/nucleotide/queries.fasta delete mode 100755 tests/test_data/outputs/search/G1/blast/protein/hsps.txt delete mode 100755 tests/test_data/outputs/search/G1/blast/protein/queries.fasta delete mode 100755 tests/test_data/outputs/search/G10/blast/nucleotide/hsps.txt delete mode 100755 tests/test_data/outputs/search/G10/blast/nucleotide/queries.fasta delete mode 100755 tests/test_data/outputs/search/G10/blast/protein/hsps.txt delete mode 100755 tests/test_data/outputs/search/G10/blast/protein/queries.fasta delete mode 100755 tests/test_data/outputs/search/G11/blast/nucleotide/hsps.txt delete mode 100755 tests/test_data/outputs/search/G11/blast/nucleotide/queries.fasta delete mode 100755 tests/test_data/outputs/search/G11/blast/protein/hsps.txt delete mode 100755 tests/test_data/outputs/search/G11/blast/protein/queries.fasta delete mode 100755 tests/test_data/outputs/search/G12/blast/nucleotide/hsps.txt delete mode 100755 tests/test_data/outputs/search/G12/blast/nucleotide/queries.fasta delete mode 100755 tests/test_data/outputs/search/G12/blast/protein/hsps.txt delete mode 100755 tests/test_data/outputs/search/G12/blast/protein/queries.fasta delete mode 100755 tests/test_data/outputs/search/G13/blast/nucleotide/hsps.txt delete mode 100755 tests/test_data/outputs/search/G13/blast/nucleotide/queries.fasta delete mode 100755 tests/test_data/outputs/search/G13/blast/protein/hsps.txt delete mode 100755 tests/test_data/outputs/search/G13/blast/protein/queries.fasta delete mode 100755 tests/test_data/outputs/search/G14/blast/nucleotide/hsps.txt delete mode 100755 tests/test_data/outputs/search/G14/blast/nucleotide/queries.fasta delete mode 100755 tests/test_data/outputs/search/G14/blast/protein/hsps.txt delete mode 100755 tests/test_data/outputs/search/G14/blast/protein/queries.fasta delete mode 100755 tests/test_data/outputs/search/G2/blast/nucleotide/hsps.txt delete mode 100755 tests/test_data/outputs/search/G2/blast/nucleotide/queries.fasta delete mode 100755 tests/test_data/outputs/search/G2/blast/protein/hsps.txt delete mode 100755 tests/test_data/outputs/search/G2/blast/protein/queries.fasta delete mode 100755 tests/test_data/outputs/search/G3/blast/nucleotide/hsps.txt delete mode 100755 tests/test_data/outputs/search/G3/blast/nucleotide/queries.fasta delete mode 100755 tests/test_data/outputs/search/G3/blast/protein/hsps.txt delete mode 100755 tests/test_data/outputs/search/G3/blast/protein/queries.fasta delete mode 100755 tests/test_data/outputs/search/G4/blast/nucleotide/hsps.txt delete mode 100755 tests/test_data/outputs/search/G4/blast/nucleotide/queries.fasta delete mode 100755 tests/test_data/outputs/search/G4/blast/protein/hsps.txt delete mode 100755 tests/test_data/outputs/search/G4/blast/protein/queries.fasta delete mode 100755 tests/test_data/outputs/search/G5/blast/nucleotide/hsps.txt delete mode 100755 tests/test_data/outputs/search/G5/blast/nucleotide/queries.fasta delete mode 100755 tests/test_data/outputs/search/G5/blast/protein/hsps.txt delete mode 100755 tests/test_data/outputs/search/G5/blast/protein/queries.fasta delete mode 100755 tests/test_data/outputs/search/G6/blast/nucleotide/hsps.txt delete mode 100755 tests/test_data/outputs/search/G6/blast/nucleotide/queries.fasta delete mode 100755 tests/test_data/outputs/search/G6/blast/protein/hsps.txt delete mode 100755 tests/test_data/outputs/search/G6/blast/protein/queries.fasta delete mode 100755 tests/test_data/outputs/search/G7/blast/nucleotide/hsps.txt delete mode 100755 tests/test_data/outputs/search/G7/blast/nucleotide/queries.fasta delete mode 100755 tests/test_data/outputs/search/G7/blast/protein/hsps.txt delete mode 100755 tests/test_data/outputs/search/G7/blast/protein/queries.fasta delete mode 100755 tests/test_data/outputs/search/G8/blast/nucleotide/hsps.txt delete mode 100755 tests/test_data/outputs/search/G8/blast/nucleotide/queries.fasta delete mode 100755 tests/test_data/outputs/search/G8/blast/protein/hsps.txt delete mode 100755 tests/test_data/outputs/search/G8/blast/protein/queries.fasta delete mode 100755 tests/test_data/outputs/search/G9/blast/nucleotide/hsps.txt delete mode 100755 tests/test_data/outputs/search/G9/blast/nucleotide/queries.fasta delete mode 100755 tests/test_data/outputs/search/G9/blast/protein/hsps.txt delete mode 100755 tests/test_data/outputs/search/G9/blast/protein/queries.fasta diff --git a/tests/test_data/outputs/extract/G1/blast_db/contigs.fasta.ndb b/tests/test_data/outputs/extract/G1/blast_db/contigs.fasta.ndb index 5fd6f7085890c929e9fa647574a304e6e7c1d317..4888bd707f72a1c5576d78df8129b31d4e04bfa0 100755 GIT binary patch delta 204 zcmZozz}V2hG(m52oPg9M0S*p!1_+oOA)p}14CSywX_zF_#)Id~G2la2B-Q$X58CdUd$15IXRn_QvaA;<L-&4LCG`Q=%_ j!i)?CU>ar*M9~BO2b%>I&hsk>fF(JAIw61&O2ha7vRoOY delta 29 lcmZo@U}{*v$jC66Q6P5Wbfw7w0vwEtn-vYp`4Id~G2la2B-Q$X58CdUd$15IXRn_QvaA;<L-&4LCG`Q=%_ j!i)?CU>ar*M9~BO2b%>I&hsk>fF(JAIw61&O2ha7vRoOY delta 29 lcmZo@U}{*v$jC66Q6P5Wbfw7w0vwEtn-vYp`4Id~G2la2B-Q$X58CdUd$15IXRn_QvaA;<L-&4LCG`Q=%_ j!i)?CU>ar*M9~BO2b%>I&hsk>fF(JAIw61&O2ha7vRoOY delta 29 lcmZo@U}{*v$jC66Q6P5Wbfw7w0vwEtn-vYp`4Id~G2la2B-Q$X58CdUd$15IXRn_QvaA;<L-&4LCG`Q=%_ j!i)?CU>ar*M9~BO2b%>I&hsk>fF(JAIw61&O2ha7vRoOY delta 29 lcmZo@U}{*v$jC66Q6P5Wbfw7w0vwEtn-vYp`4Id~G2la2B-Q$X58CdUd$15IXRn_QvaA;<L-&4LCG`Q=%_ j!i)?CU>ar*M9~BO2b%>I&hsk>fF(JAIw61&O2ha7vRoOY delta 29 lcmZo@U}{*v$jC66Q6P5Wbfw7w0vwEtn-vYp`46bcSN delta 77 zcmbQiID=7~fq{V)h+#l0CqFqeCADIrgtdgMS81Mtp@EKqk%5tkf`Xx;m7$@6qc6k6 TELlMZpaw<;2Jw?XoC~4>0?Q57 diff --git a/tests/test_data/outputs/extract/G13/blast_db/contigs.fasta.njs b/tests/test_data/outputs/extract/G13/blast_db/contigs.fasta.njs index fff3efe..e92a3f5 100755 --- a/tests/test_data/outputs/extract/G13/blast_db/contigs.fasta.njs +++ b/tests/test_data/outputs/extract/G13/blast_db/contigs.fasta.njs @@ -3,17 +3,19 @@ "dbname": "contigs.fasta", "dbtype": "Nucleotide", "db-version": 5, - "description": "locidex/extract/G13/blast_db/contigs.fasta", + "description": "test_dev/extract/G13/blast_db/contigs.fasta", "number-of-letters": 23981, "number-of-sequences": 1, - "last-updated": "2024-06-10T11:11:00", + "last-updated": "2024-06-19T10:43:00", "number-of-volumes": 1, - "bytes-total": 43197, + "bytes-total": 55515, "bytes-to-cache": 6241, "files": [ "contigs.fasta.ndb", "contigs.fasta.nhr", "contigs.fasta.nin", + "contigs.fasta.nog", + "contigs.fasta.nos", "contigs.fasta.not", "contigs.fasta.nsq", "contigs.fasta.ntf", diff --git a/tests/test_data/outputs/extract/G14/blast_db/contigs.fasta.ndb b/tests/test_data/outputs/extract/G14/blast_db/contigs.fasta.ndb index 5fd6f7085890c929e9fa647574a304e6e7c1d317..4888bd707f72a1c5576d78df8129b31d4e04bfa0 100755 GIT binary patch delta 204 zcmZozz}V2hG(m52oPg9M0S*p!1_+oOA)p}14CSywX_zF_#)Id~G2la2B-Q$X58CdUd$15IXRn_QvaA;<L-&4LCG`Q=%_ j!i)?CU>ar*M9~BO2b%>I&hsk>fF(JAIw61&O2ha7vRoOY delta 29 lcmZo@U}{*v$jC66Q6P5Wbfw7w0vwEtn-vYp`46bcSN delta 77 zcmbQiID=7~fq{V)h+#l0CqFqeCADIrgtdgMS81Mtp@EKqk%5tkf`Xx;m7$@6qc6k6 TELlMZpaw<;2Jw?XoC~4>0?Q57 diff --git a/tests/test_data/outputs/extract/G14/blast_db/contigs.fasta.njs b/tests/test_data/outputs/extract/G14/blast_db/contigs.fasta.njs index b008c95..e69309d 100755 --- a/tests/test_data/outputs/extract/G14/blast_db/contigs.fasta.njs +++ b/tests/test_data/outputs/extract/G14/blast_db/contigs.fasta.njs @@ -3,17 +3,19 @@ "dbname": "contigs.fasta", "dbtype": "Nucleotide", "db-version": 5, - "description": "locidex/extract/G14/blast_db/contigs.fasta", + "description": "test_dev/extract/G14/blast_db/contigs.fasta", "number-of-letters": 23981, "number-of-sequences": 1, - "last-updated": "2024-06-10T11:11:00", + "last-updated": "2024-06-19T10:43:00", "number-of-volumes": 1, - "bytes-total": 43197, + "bytes-total": 55515, "bytes-to-cache": 6241, "files": [ "contigs.fasta.ndb", "contigs.fasta.nhr", "contigs.fasta.nin", + "contigs.fasta.nog", + "contigs.fasta.nos", "contigs.fasta.not", "contigs.fasta.nsq", "contigs.fasta.ntf", diff --git a/tests/test_data/outputs/extract/G2/blast_db/contigs.fasta.ndb b/tests/test_data/outputs/extract/G2/blast_db/contigs.fasta.ndb index 5fd6f7085890c929e9fa647574a304e6e7c1d317..4888bd707f72a1c5576d78df8129b31d4e04bfa0 100755 GIT binary patch delta 204 zcmZozz}V2hG(m52oPg9M0S*p!1_+oOA)p}14CSywX_zF_#)Id~G2la2B-Q$X58CdUd$15IXRn_QvaA;<L-&4LCG`Q=%_ j!i)?CU>ar*M9~BO2b%>I&hsk>fF(JAIw61&O2ha7vRoOY delta 29 lcmZo@U}{*v$jC66Q6P5Wbfw7w0vwEtn-vYp`4Id~G2la2B-Q$X58CdUd$15IXRn_QvaA;<L-&4LCG`Q=%_ j!i)?CU>ar*M9~BO2b%>I&hsk>fF(JAIw61&O2ha7vRoOY delta 29 lcmZo@U}{*v$jC66Q6P5Wbfw7w0vwEtn-vYp`4Id~G2la2B-Q$X58CdUd$15IXRn_QvaA;<L-&4LCG`Q=%_ j!i)?CU>ar*M9~BO2b%>I&hsk>fF(JAIw61&O2ha7vRoOY delta 29 lcmZo@U}{*v$jC66Q6P5Wbfw7w0vwEtn-vYp`4 diff --git a/tests/test_data/outputs/extract/G4/blast_db/contigs.fasta.njs b/tests/test_data/outputs/extract/G4/blast_db/contigs.fasta.njs index 5f1ea63..887aedc 100755 --- a/tests/test_data/outputs/extract/G4/blast_db/contigs.fasta.njs +++ b/tests/test_data/outputs/extract/G4/blast_db/contigs.fasta.njs @@ -3,17 +3,19 @@ "dbname": "contigs.fasta", "dbtype": "Nucleotide", "db-version": 5, - "description": "locidex/extract/G4/blast_db/contigs.fasta", + "description": "test_dev/extract/G4/blast_db/contigs.fasta", "number-of-letters": 22872, "number-of-sequences": 1, - "last-updated": "2024-06-10T11:09:00", + "last-updated": "2024-06-19T10:42:00", "number-of-volumes": 1, - "bytes-total": 42908, - "bytes-to-cache": 5952, + "bytes-total": 55234, + "bytes-to-cache": 5960, "files": [ "contigs.fasta.ndb", "contigs.fasta.nhr", "contigs.fasta.nin", + "contigs.fasta.nog", + "contigs.fasta.nos", "contigs.fasta.not", "contigs.fasta.nsq", "contigs.fasta.ntf", diff --git a/tests/test_data/outputs/extract/G5/blast_db/contigs.fasta.ndb b/tests/test_data/outputs/extract/G5/blast_db/contigs.fasta.ndb index 5fd6f7085890c929e9fa647574a304e6e7c1d317..4888bd707f72a1c5576d78df8129b31d4e04bfa0 100755 GIT binary patch delta 204 zcmZozz}V2hG(m52oPg9M0S*p!1_+oOA)p}14CSywX_zF_#)Id~G2la2B-Q$X58CdUd$15IXRn_QvaA;<L-&4LCG`Q=%_ j!i)?CU>ar*M9~BO2b%>I&hsk>fF(JAIw61&O2ha7vRoOY delta 29 lcmZo@U}{*v$jC66Q6P5Wbfw7w0vwEtn-vYp`4 diff --git a/tests/test_data/outputs/extract/G5/blast_db/contigs.fasta.njs b/tests/test_data/outputs/extract/G5/blast_db/contigs.fasta.njs index 45b0214..47de16b 100755 --- a/tests/test_data/outputs/extract/G5/blast_db/contigs.fasta.njs +++ b/tests/test_data/outputs/extract/G5/blast_db/contigs.fasta.njs @@ -3,17 +3,19 @@ "dbname": "contigs.fasta", "dbtype": "Nucleotide", "db-version": 5, - "description": "locidex/extract/G5/blast_db/contigs.fasta", + "description": "test_dev/extract/G5/blast_db/contigs.fasta", "number-of-letters": 22872, "number-of-sequences": 1, - "last-updated": "2024-06-10T11:09:00", + "last-updated": "2024-06-19T10:42:00", "number-of-volumes": 1, - "bytes-total": 42908, - "bytes-to-cache": 5952, + "bytes-total": 55234, + "bytes-to-cache": 5960, "files": [ "contigs.fasta.ndb", "contigs.fasta.nhr", "contigs.fasta.nin", + "contigs.fasta.nog", + "contigs.fasta.nos", "contigs.fasta.not", "contigs.fasta.nsq", "contigs.fasta.ntf", diff --git a/tests/test_data/outputs/extract/G6/blast_db/contigs.fasta.ndb b/tests/test_data/outputs/extract/G6/blast_db/contigs.fasta.ndb index 5fd6f7085890c929e9fa647574a304e6e7c1d317..4888bd707f72a1c5576d78df8129b31d4e04bfa0 100755 GIT binary patch delta 204 zcmZozz}V2hG(m52oPg9M0S*p!1_+oOA)p}14CSywX_zF_#)Id~G2la2B-Q$X58CdUd$15IXRn_QvaA;<L-&4LCG`Q=%_ j!i)?CU>ar*M9~BO2b%>I&hsk>fF(JAIw61&O2ha7vRoOY delta 29 lcmZo@U}{*v$jC66Q6P5Wbfw7w0vwEtn-vYp`4 diff --git a/tests/test_data/outputs/extract/G6/blast_db/contigs.fasta.njs b/tests/test_data/outputs/extract/G6/blast_db/contigs.fasta.njs index 69df09f..d213806 100755 --- a/tests/test_data/outputs/extract/G6/blast_db/contigs.fasta.njs +++ b/tests/test_data/outputs/extract/G6/blast_db/contigs.fasta.njs @@ -3,17 +3,19 @@ "dbname": "contigs.fasta", "dbtype": "Nucleotide", "db-version": 5, - "description": "locidex/extract/G6/blast_db/contigs.fasta", + "description": "test_dev/extract/G6/blast_db/contigs.fasta", "number-of-letters": 22872, "number-of-sequences": 1, - "last-updated": "2024-06-10T11:09:00", + "last-updated": "2024-06-19T10:42:00", "number-of-volumes": 1, - "bytes-total": 42908, - "bytes-to-cache": 5952, + "bytes-total": 55234, + "bytes-to-cache": 5960, "files": [ "contigs.fasta.ndb", "contigs.fasta.nhr", "contigs.fasta.nin", + "contigs.fasta.nog", + "contigs.fasta.nos", "contigs.fasta.not", "contigs.fasta.nsq", "contigs.fasta.ntf", diff --git a/tests/test_data/outputs/extract/G7/blast_db/contigs.fasta.ndb b/tests/test_data/outputs/extract/G7/blast_db/contigs.fasta.ndb index 5fd6f7085890c929e9fa647574a304e6e7c1d317..4888bd707f72a1c5576d78df8129b31d4e04bfa0 100755 GIT binary patch delta 204 zcmZozz}V2hG(m52oPg9M0S*p!1_+oOA)p}14CSywX_zF_#)Id~G2la2B-Q$X58CdUd$15IXRn_QvaA;<L-&4LCG`Q=%_ j!i)?CU>ar*M9~BO2b%>I&hsk>fF(JAIw61&O2ha7vRoOY delta 29 lcmZo@U}{*v$jC66Q6P5Wbfw7w0vwEtn-vYp`4 diff --git a/tests/test_data/outputs/extract/G7/blast_db/contigs.fasta.njs b/tests/test_data/outputs/extract/G7/blast_db/contigs.fasta.njs index 264d3e7..5dbb0b5 100755 --- a/tests/test_data/outputs/extract/G7/blast_db/contigs.fasta.njs +++ b/tests/test_data/outputs/extract/G7/blast_db/contigs.fasta.njs @@ -3,17 +3,19 @@ "dbname": "contigs.fasta", "dbtype": "Nucleotide", "db-version": 5, - "description": "locidex/extract/G7/blast_db/contigs.fasta", + "description": "test_dev/extract/G7/blast_db/contigs.fasta", "number-of-letters": 22872, "number-of-sequences": 1, - "last-updated": "2024-06-10T11:09:00", + "last-updated": "2024-06-19T10:42:00", "number-of-volumes": 1, - "bytes-total": 42908, - "bytes-to-cache": 5952, + "bytes-total": 55234, + "bytes-to-cache": 5960, "files": [ "contigs.fasta.ndb", "contigs.fasta.nhr", "contigs.fasta.nin", + "contigs.fasta.nog", + "contigs.fasta.nos", "contigs.fasta.not", "contigs.fasta.nsq", "contigs.fasta.ntf", diff --git a/tests/test_data/outputs/extract/G8/blast_db/contigs.fasta.ndb b/tests/test_data/outputs/extract/G8/blast_db/contigs.fasta.ndb index 5fd6f7085890c929e9fa647574a304e6e7c1d317..4888bd707f72a1c5576d78df8129b31d4e04bfa0 100755 GIT binary patch delta 204 zcmZozz}V2hG(m52oPg9M0S*p!1_+oOA)p}14CSywX_zF_#)Id~G2la2B-Q$X58CdUd$15IXRn_QvaA;<L-&4LCG`Q=%_ j!i)?CU>ar*M9~BO2b%>I&hsk>fF(JAIw61&O2ha7vRoOY delta 29 lcmZo@U}{*v$jC66Q6P5Wbfw7w0vwEtn-vYp`4Id~G2la2B-Q$X58CdUd$15IXRn_QvaA;<L-&4LCG`Q=%_ j!i)?CU>ar*M9~BO2b%>I&hsk>fF(JAIw61&O2ha7vRoOY delta 29 lcmZo@U}{*v$jC66Q6P5Wbfw7w0vwEtn-vYp`40 -atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa ->1 -atgaaactttttctgacaacggcagcgctaactgcgaccttgacttccggcatgggatttgccagcgataccgtgatcccctgggcgactaacagtggcggtacggaaagcacgcatatcgcggcaatgggcgaggatgtgaatgcgcagcatcagcacatcatccacacgcatgaaggcgtatgtgccgcaaattccggcaccattcaggctgatgaagccgcgctgaccagcaacaagccaccggtacaagtccaaccggaattgttgccgcatcagggctaa ->2 -atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa ->3 -ctgaaactaatacctttttatttgctcgcgctattttctgcctcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaacccctgcagataacccgccttataccgtgacaatattgccgccagtagaagcctgcggggaaaaatgtgtgaaactgaacgtagagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaacgacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctacaaccgacagctgcaattacttagggctggcgtaataccgaaaaataaaaagtggaaatggataacgaagatggaaaattactggttcaggcgggtaaaagcgcactaa ->4 -atgaaaaaatttgatagctgtcatcctgtctttttactgatcggctgtgcccaggtgcctctccgttcctccgtgagcaaaccggtacagcaacctagcgctcagaaagagcaactggccaacgcaaatggtattgatgagtgtcagtcacttccgtatgtgccttcagaccttgcgaagaataaatcattatcaaaccagatcgctgataataccgcatcaaaaaatagcgcaatcagctcacgcattttttgcgaaaaatataagcaaaccaaagaacaggcgcttaccttcttccaggaacttccacaatacatgcgttcgaaagaagttgaagagcaacacatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa ->5 -atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa ->6 -gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggactcacaacatgcctgtagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgccagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcgccatccagtcggcaacctggttttcggcaggcaggtcgcgtagggcggttaaaaaaatgatggattgcggcagcggtatttattcttcaatcaatacgttactgaaaaattcgcagattaaaaatatagtcattttaacccataatcattgtttgacgtatattgtaaaaaataagcgcggcgtgaagtttgatccggagtatcttaatgcgctcgttatgtatgcggaaaacggaaaactccttttagacggtgaatttgttcccggctag ->7 -atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga ->8 -atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag ->9 -atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa ->10 -gtgaacctggtgaaaaccccgggtctgcacgcggcgggcaaaggcattaacgttgccaatgtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccaacttcaactttcccggctttgacgtgactcccgcagactgggaacgctttgttaacgacttcctgagctggctgggtcagttcgatatggtctgcgtaagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatcgttgatactagccgtgtagagttagtcgccggtcttatagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcccgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgcgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggaaaataccgtcggcgcaggcggttccatggttggcggatggatttacggcctgctgatgcgcgagtccaccgaacatacgctgcgcctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga ->11 -atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga ->12 -atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga ->13 -ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctattacgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccagcgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcggaaaggcgggattggttcgcgcggtccgggtgaaacccagctcgaagccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgagaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgcagctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaattccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtccgcaggtacgaatgccgattgtcgactggcgtcgcttctgtaaacaagaaccagcgttgatcgaatacgtgatctag ->14 -gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga ->15 -atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctcgtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatctgcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacggtgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcactcaaccggaagggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaaggctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgatgcgctgtcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtaagctgatcagccttgtgcgcaaccttaatcaactgctcaaaagcgagcacgaacgttataacaaataccgcacgagcctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatctgcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcatagcccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttctggccgacagcatcccacacagaaagaggaataa ->16 -atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa ->17 -atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag ->18 -atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacatcgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccacagtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggtcgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattacttgaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcatacagggcggtacggaagtttgtcagcaaaaactggataacatgcgcaacgcaggcgtaaaagtgtacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggggtcagcgcagagctcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacagaagaccggcaagctcggcgagatcctgttctttatgcgggcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccagcaatgactggcagttcgacggcgacgcggaaactattgcacactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgagatttccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa ->19 -atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa diff --git a/tests/test_data/outputs/locidex_db/blast/nucleotide/nucleotide.ndb b/tests/test_data/outputs/locidex_db/blast/nucleotide/nucleotide.ndb deleted file mode 100755 index a9b411b3621a29a79d75d1c0da6f615196410c93..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 20480 zcmeI%K~94}6adgKnn>JWOz&Xac?7Rw6q6Y_2JE}A?R z%pYd>{61E{M5H5cgOBCs;v`+#jMwis?esDtziXqtr?39lyJ@tT_U}A%v@Rq-fB*pk z1PBlyK!5-N0t5~h82ax2=>Lc8L)6ur^`)orte^kfcla-qfdByl1PBlyK!5-N0t5&U zNd0$27L%A4%VJ(oAwYlt0RjXF5FkK+009C7{#M|&NteZWyw$Vn{<+*F&tv!qK1AJP z)BLoXMvU&CR!{YB?@yoGW1Fa{;wo0fO81{=jRXh~AV7cs0RjXF5FkL{2n8`+M{ky7LD7ATZ(!@Cx4FfuDdc!3W?g@FDm;_$l}r zd<6ageg^&sKK}2Op#dfopasq;Kod+UKpV^`KqFjGfL6Gq0L?I`0PS!^0UF|(0<^@< GkH8C-tr(mD diff --git a/tests/test_data/outputs/locidex_db/blast/nucleotide/nucleotide.nin b/tests/test_data/outputs/locidex_db/blast/nucleotide/nucleotide.nin deleted file mode 100755 index 10e6f3db680b3ce612ca913c6c072fc64983b718..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 372 zcmb`C%L)Nu7=~YGjgV+IO8BFzm!Uhb-5Ih@F&bvCjaS3G zM80)2-@heuy^lzxP*O8CyD}+XB0l#oJb!sUA5YjZv*(e^Q-CbXIH7APY}09RzU`{l z<&}dsG+lKzlYPWM&y4B2FXB0LKEJQ|1(W}4Aaq}3LmU+NR^B|84%m$R^ehzNYTM94 zG{XCN+RD;!;LhCRp|tG}v_)(3ynrGNckk4A$poVw=C_y<1nmYhNie(1AA0F%_oeb6Y`l1D7@4{cnV2kZn#&eJfKB&|a~8kcvs&jP0r| z=}p!#h+f~gLP1ZSyIgg#)B(ZJG|d(Tvvx78wMmO6?+jK+Kcq!nU>SbYn2NvUx`+Ml zDG_mJD1Rzex2k?CwKfCc7N`|rwXXo1m>WrFw8Qu78xNsZ12H< z?=@Uu=q&JA)Z6vs@v8-Pg_k=nH|Lt%w=qRsVrA--!@X5ru5E!QN1w<)XRM|fZ1Wi2 znGHJh<#}tNd@)b+HfLatdK%uTws%b5VpG3te>AhCsXvvuCD_EMTXT6&Dqdd~t>@^( zURad}Ya)+FJHXu1yQ9o{lwiXXSF01wPuyO;$#;K}vUG*=W&$=V#Ur}pv9!&$G7heI z=vzt}%Pn=V|@v?l$-*M18t9 zq>DYU1~WiWn}3wye|T$1p&xS-!CAXg{-&j`y{NEq=k^Xv1l=qYL3r3s2EbO;{(1Jj z&)iV_j-2)I_jrL6vl^rEP@g$lb~L*hGCSlE(JjyikFCp#g~nH2!Rv>20m@wHNv$(~ z?1ameUA7GYRR_s=GVd^ue-goo1R?Z9lqvhFOIagHB&mRru_!WsBRlVXz_!htw>@!J z$P^b5lbU;jJV*?C$3M+ux+S>rL&%}EW`}A6Y*2Y=u3LWA-In-u!Xx>3Sm$ZE&DuPa z8o}I*6+6goC2TYNJ6+S*o9{9o7~DR-N;qTplYvd``l281w@uQDyb_lB!&04Rv9jPd zQ?7IA>wlS-93NJHS@v@6EySgg72BSx`@FmFpq)*yA=+9#g<1KTK1ML*UUAj2A(QT& zU(O^AOr_+vv9B42rA;~Uqv<$rr;!A-*83$S?riY=Y75B2iNARF{dy+xOI^tS!QAq^ zlIscVdrtxUfHwuwqzG@V%I@367sO3ybE(2#zg(h--cly^ETm+)6P&LayO;1xyb=cI z=!VgT3FOhNHTq~nkKjQ%lP_z2{iXUUNk9}&pmoHBDz z%`1(Z>8osb1E2bUcQN@ytkcZHHD6&=^TM!k-OVq!X-5&&hrRmCl(-8rEi-v#Twl%@ z)aq(0eUpHo=e44>0b_FTvAPjt`*q>n&fB>v&1TbD*qM6_1SH-X0$=rQncDj)FShY2 zwQiZF>40T-jra~>Qs)MXOi}_=BjqJHCyTV?I2eb2Xy+cEZ}=4UyBA!fh_cpm<7K9D zZe+=^X67Mne_sXkel(5lccYA*FoOwv;N~$Q`=0mjt$#>0i~E5@uvt5u#7D9=pqlz( zycUZilL<)V1-^0h*t1sI60=eSQ3UOXOQnX{McO8Ul|Eg$Kni8cMHkJv(WeKZdtc3Ya8xKx@z^nq5yKAKu>NZ zj$+A7q1^N{Jg@gl@$B2tLU6`pQ#_t{(-Jnckgv?^heA4di=>^KAKFO9%H7%%i&0mS z&Yj&9-|MGThzkXx8O^ctxfd?GsUCO)r+%KGZni~1RFE$n%==$hV;FWJ;*qMX;Y#Z$ zv#IF$c#%6{viat<${`L@h#JP&`phavW{o&T+0h}k2DFRJr$Ns5!6mwVE#c$Nu(RpL zHqW&nZ>`cce^;36R*N}r$JSWe`dP7?Qa&F9i-;Jjv2omL&RRcRuza>!dtZrQjvS$$ zt=%Kfw2Sp0{URg@7r*oM&P^CVeDFuM)A6=hX(ugJ?o2Q4-R7L9A!(*Dtk)m z@~1^!y;eo}BC$cGIXjpwm}-VuW_{oUBq7qPtn`dy1p31C{!;&4lBKsP8bp^3QjI2n z0sBBN$Yylxat1fZlT^7qx>KNUva0BT*P~I54Vlsn_c0`AJdUrn#q5!xcUE~7l<~w9 zJWj*5?^WcuBS$cAOV<`*o~Xc_E&rzwlG5I9s#`Zi?6q zZK1QVb(&@O=~24!8_HXdDC#p#`CPFQCrrVgZzbj-W;Pf^QDxJg`|1`kpT$RDRtklP z{a1oLu{uVcBS8u&likO)Q7f+wH@{A8B_cdk=O8R~V0^?+l>$5*IevYlr4j<5#MCB{ zhV!|6i@*;KHQCclJZwFja)-P_X0o2dd_;BU+d_K=3tWW()MEHdEHHyb(7JsVzg_C^ z3LrtjHb=8ffr9)y=E)}*d?M6WYi@+#l?6!v+fiJ~&1b-M)q=L%;1Y>PxhEXEed6?3 z6brq(-?XNpztoR$JJWfHsYj|LR;$}bFh5Sw7t0$3-P8x*QCi`*Se)WpjcQY)ap7_c zNyo$+rG9=_Ehelg32aR?-w0% zT8qKLj4x6C*vMl67bYt!9)G6X&f1FW;~TMONGkF36Y0Ei*@Cj4bCi1f>f*Nx>0;Xk zdF-mC_ft!)G*<_7asDQQB4ON{*qnuo@d+&xcit!+#s5Ie@g&s4%`A-*-LC@f&L<+{$R6Xpu6K|taaeV_<3)$6p6l}*aCSSv8LrUd28)FnmXnmK?s>1 zT6$&8F0egv8l%w>)wVKLgCb>}HN#%FZLO$~kk5O>ZI|M?POF&fO~ti9q`Mt`l4l$g zYOHy{0p{w9)vE($`ELjE&BZaBlGq(xQ3v2}7?*@%!e z(q-?8<-8M64SHFb%^&$1M8A1~(l$px14)gDgX>fgIo$YH$!_FdBu{D;FEy8!uueX5 zZhk&jwM5aI^;O7NlKZ@(>ofP>C&Cd+;f`rNl-VM$4CLs6f}%N!sbF<_ zN|VaF=^Gu8;(N;pnbHE1x1Qb*-Yl7VM_l3ZRa_sI5k@?E@(T0GRw2GO2goI$=v7H- zYSz~w;MG{=!FKXwSF^lCOgKLJxnN%tFV*#tX(h+}QTNk>P3bF7d6b+5>?t~_rkPyI z{I?HW5CD*1k?MH_WWoxM?Wxr{J+>gP?!7ua)2I2cNW;QeW>_RN zfxiF!%KE5>oiD~2*Zk_iwdY@IwL2>nsCzM}bQ#p5%;V){#0#^V>Qw`}k?D@(OiB)9 z$ticJ8&n0cteaS4IV}n+9x4j1dP$l~Oe9YhUaA_cZ}mM#gXb#GK6 zVlF`u$k6FL`8=wO$l>o8%NomoX3me2zdcz2f8Jl&?ueazl?VwYA;n_aecYIR>^v>? zNnvWbo9~uqO#W9wA>S&v%Ooha)+3^+5=cN2!_50@Uhf!$6hgSciR@TXRI)WdCNp4@ za~jM5>pVTCAE_8jGz5c>oN{-$o%MOJV)qw_{?F@5hfAX;sN;m zo_Cq&3Y8SHzq;hY&7%AqB32zcdcO;LHbG?@4Ji6 zXF-N%;IHh9Ac$yO$dDX+raQ?=~!Tv?zx#{h<}|rsARE2E1~ZvB|j?DUn?OM zA^F#rY1d*^!`6+%npyawf8?B+mH;*fJOZ8~PCiC6#EgheSapnSI{Vm=pnq-`&^2v!pFhs&6WR$y41{o8a1s8Xo?Yce% z*ywgLW|%W*OyL60b;d7vs}Bk5dqdp8=95`T(QH}9mF8@$$V-Lg(rl~2opnG~pu-vf8hE{MGW=$}3NB@mwA8vqcG71| z?f2x_+D4izJhB^@>dB9$>R+3J5&96#A28U2idD>oZ1Ca*){(da3|cUtPSCcHHShUG zRu_~}pouk1;NAQ29p_#nMWfp%leM61)uM@HGh00h)JqdBkqo&9%JCs?+E^fTY^51) zvsIkjp7eto#bUni8h;S_K=x}7v#%&~S;*a|(157i@$~o3%aeu<2mL8fKx`(PcsF<& zc{90NKrLEO3N;UMcT$S8pWea$R(3V6%WzBy;4j+Pbga))L-}v|GAdcL!As^I4E|$w zJ)99S9Igh_R4}ah_U4&B$p{t?i%T-uFgglm4U;4Ae<+XP^1q{Ip~1M$vM&~*8;iWm zGU$>i8-Q)WDGLRzwOViJlA?t*cT&`^RD-j*l{()T4?r^FgMeOG@w`LqC{MF|JI0M- z>klW>%QOWRwfeU}ELN$D3L$j{^u7+jMltP`H4kcbOX4$abCe8%Mc-Y|2gXY{Z^1xp z-~F{B+e*cTw^eS>+Q)#0f&v0*zjc{VQk~#o#vy5Y_1!Uks8Tsis4MjKKV*<M1-g zehQqAys!ub1YZ}JZPo~4Z1m|8Tz>QF`D8~PeHkjfJ3n@#mTBR)?OiO1$WC^gxq=J4 z*Dqpx8paS!8ciX*5ZD>**+D}93cC%yE$cb7^Aj!Eg;%O^4kG619m;hU3h_~e1sc@` zf)GH-E45+<_gP_F#PB!y`E=tF9Z)TR;V8aL@bSA4s)M22kc4hX~XwjtV8 zzL_^biVHUzFLjem4Dfq**#_=er373f0V*WW$kikjLll5DSxWkAMDY6o28H807xW)p z_h$KCLCnoCDf72<1bKuF&s97wlX90C{GMW2=S08g&+X^-#*w`dMe5=^lQ!i`jezn^ zUx1}=;ruxsaVgdoBLJ8I#Egl*Aq+(VZHJP>dzo%BjFR++9eWG@F^1yKXezT2B%KYK ziP|T(vDolmG4t-PGI1XvZUbM^g(yCcWK(D?Lc@1YSi@p{p+HqW9@Fr1T_$d!a|DnwR0}#*Gvzd|71X z{g6z)-S85T*RV^r-#=duIdzJkyQgkAju_6m?CR~ezpidNTg?3z?>t+V5+Fc;009C7 z2oNAZfB*pkw+qz1`!D+c`t+KWnseOrpYYhnoWK3cJb?fK0t5&UAV7cs0RjXF5J-XS zzayHM#HQV}xgruEK!5-N0t5&UAV7cs0RsO);N@6E6QfwqKVBEpx7F%>xjxnP9Q72v tW~JgB=Xra^(*N(D`o%y diff --git a/tests/test_data/outputs/locidex_db/blast/nucleotide/nucleotide.nto b/tests/test_data/outputs/locidex_db/blast/nucleotide/nucleotide.nto deleted file mode 100755 index 91d3a927c3e718edb4b7b0774fed440a3c193069..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 84 vcmXBF2?~H9002QdP16qZ|8H%;g<*RbO*XTzvau66NMue5m5ZB)m&V5r8>|4+ diff --git a/tests/test_data/outputs/locidex_db/blast/protein/protein.fasta b/tests/test_data/outputs/locidex_db/blast/protein/protein.fasta deleted file mode 100755 index 38c6a95..0000000 --- a/tests/test_data/outputs/locidex_db/blast/protein/protein.fasta +++ /dev/null @@ -1,40 +0,0 @@ ->0 -mydppflealmitasffaifiiivvsvlllegd* ->1 -mklflttaaltatltsgmgfasdtvipwatnsggtesthiaamgedvnaqhqhiihthegvcaansgtiqadeaaltsnkppvqvqpellphqg* ->2 -maknrsrrlrkkmhidefqelgfsvawrfpegtseeqidktvddfindviepnklafdgsgylaweglicmqeigkcteehqaivrkwlearnleevrtselfdvwwd* ->3 -lklipfyllalfsassgateinackdligtwktpadnppytvtilppveacgekcvklnveyeldvthrnalycherqegvkgqgpmviafegaygghaigtynrqlqllragvipknkkwkwitkmenywfrrvkah* ->4 -mkkfdschpvflligcaqvplrssvskpvqqpsaqkeqlanangidecqslpyvpsdlaknkslsnqiadntasknsaissrifcekykqtkeqaltffqelpqymrskeveeqhmtefkkvllepgsknlsiyqtllaaherlqal* ->5 -mrikpddnwrwyydeehdrmmldlangmlfrsrfsrkmltpdafcptgfcvddaalyfsfeekcrdfeltkeqraelvlnalvairylkpqmpkswhfvahgemwtpgtgdaasvwlsdtaeqvnllvvepgenaalcllaqpgvviagrtmqlgdaikimndrlkpqvhchsfsleqav* ->6 -vlaftlrfiknkryfailagalviiagldsqhacsgnglpqingkalaalakqhpvvvlfrhaercdrsdntclpdstgitvngaqdaralgkafsadiqnynlyssntvraiqsatwfsagrsrravkkmmdcgsgiyssintllknsqiknivilthnhcltyivknkrgvkfdpeylnalvmyaengkllldgefvpg* ->7 -mgsnyivieglegagkttardvvvetleqlgirnmiftrepggtqlaeklrslvldirsvgdevitdkaevlmfyaarvqlvetvikpalaqgvwvigdrhdlstqayqgggrgidqtmlatlrdavlgdfrpdltlyldvtpevglkrarargdldrieqesfdffnrtrarylelaaqdsrirtidatqpldavmrdiratvtkwvqeqaa* ->8 -mkhikksvlvvlltshvahasivvggtrlvfdgnndessinvenkdskanlvqswlsvadpqvtnkqvfiitpplfrldagqknsirvirsgaplpadresmywlnikgipsiddnasanrveisintqikliyrppaltkstpdsqsqqlkwqtagdvitvnnptpyymnfasvtlnshevksatfvppkssasfklsstaaphgtvtwrlisdygmslephsgsf* ->9 -mrillsnddgvhapgiqtlakalrefadvqvvapdrnrsgasnsltlesslrtftfdngdiavqmgtptdcvylgvnalmrprpdivvsginagpnlgddviysgtvaaamegrhlgfpalavslngyqhydtaaavtcallrglsreplrtgrilnvnvpdlplaqvkgirvtrcgsrhpadkvipqedprgntlywigppgdkydagpdtdfaavdegyvsvtplhvdltahsahdvvsdwldsvgvgtqw* ->10 -vnlvktpglhaagkginvanvlkdlgidvtvggflgkdnqdgfqqlfselgianrfqvvqgrtrinvkltekdgevtnfnfpgfdvtpadwerfvndflswlgqfdmvcvsgslpagvspeaftdwmtrlrsqcpciivdtsrvelvagliaapwlvkpnrreleiwagrklpemkdvidaapalreqgiahvvislgaegalrvnasgewiakppavdventvgaggsmvggwiygllmrestehtlrlatavaalavsqsnvgitdrpqlaammarvdlqpfn* ->11 -mnknkystpllmlatilagmlspmqsavngqlghwlqdgnacavisfasglvvmffiiiarketrqqfasiptlikkrkiplwnwfaglcgamvvfsegasasalgvatfqtalisalllsgllcdrfgigveekkyftpwritgalfaviatifvvspqwhstsfillailpflagllagwqpagnakvaeatgsmlvsitwnfivgfcvlgaalairialghvtiqlpdtwwmylggplgllsiglmailvrglgllmlgvastagqllgsvlidelipslgntvylvtiigtlfalvgaivttipeyraskmaqkmevse* ->12 -mkgrwakyvatgvmlamlaacsskptdrgqqykdgkftqpfslvnqpdavgapinagdfaeqvdqirsasprlytnqsnvynavqnwlrsggdtrtmrqfgidawqmegtdnygnvqftgyytpvvqarhtrqgafqypiysmppkrgrlpsraqiyagalsdkyilawsnslmdnfimdvqgsgyidfgdgsplnffsyagkngwpyrsigkvlidrgevkkedmsmqairewgekhseaevrelleqnpsfvffkpqsfapvkgasavpligrasvasdrsiippgttllaevplldnngkfsgqyelrlmvaldvggaikgqhfdiyqgigpdaghragwynhygrvwvlksapgagnvfsg* ->13 -lfdrydageqavlvhiyfsqdkdmedlqefeslvyyagveamqvitgsrkaphpkyfvgegkaveiaeavkatgaavvlfdhalspaqernlerlcecrvidrtglildifaqrarthegklqvelaqlrhlatrlvrgwthlerrkggigsrgpgetqleadrrllrnrivqiqsrlekvekqreqgrqsrikadvptvslvgytnagkstlfnqitearvyaadqlfatldptlrridvadvgetvladtvgfirhlpydlvaafkatlqetrqatqllhvvdaadvrvqenieavntvleeidahefptlmvmnkidmlddfepridrdeenkpirvwlsaqsgvgipqlfqalterlsgevaqhtlrllpqegrlrsrfyqlqaiekewmeedgsvspqvrmpivdwrrfckqepalieyvi* ->14 -vaqrilvlgasgyigqhlvfalsqqghqvraaarrverlekhrlanvschkvdlhwpenlpallrdidtvyylvhgmgeggdfiaherqaalnvrdalrqtpvkqliflsslqapaheqsdhlrarqltadtlrdagvpvtelragiivgagsaafevmrdmvynlpiltpprwvrsrttpialenllyylvgllehpahehrileaagpqvlsyqqqferfmavsgkrrplipvpfptrwisvwflnvitsvppttakaliqglrhdlladdaalkklipqtlitfddavrrtlkeeeklvnssdwgydalafarwrpeygyfpkqagftaqtpaslsalwqvvnrlggkegyffgnilwqtraamdrlvghklakgrpshtllkpgdtvdswkviivepekqltllfgmkapglgrlsftlhdkgryreidvrawwhphgmpgliywllmipahlfifrgmarriarlaeqitek* ->15 -mnkfarhflplslrvrfllatagvvlvlslaygivalvgysvsfdkttfrllrgesnlfytlakwennkicvelpenldmqsptmtviydetgkllwtqrnipwlikstqpeglktngfheietnvdatstllsedhsaqeklkevreddddaemthsvavniypattrmpqltivvvdtipielkgsymvwswfvyvlaanlllvipllwiaawwslrpidalsrevreledhhremlnpettrklislvrnlnqllkseherynkyrtsltdlthslktplavlqstlrslrnekmsvskaepvmleqisrisqqigyylhrasmrgsgvllsrelhpvaplldnlisalnkvylrkgvnismdispeisfvgeqndfvevmgnvldnackyclefveisarqtddhlhifveddgpgiahskrslvfdrgqradtlrpgqgvglavareiteqyagqiiasdsllggarmevvsgrqhptqkee* ->16 -mtiqkrlleaveqkllrpidaqfaltvagnddpavtlaaallshdageghvclplsrltlteeahpllvawisetatpidwkkrllasaavscgdspaplilcgdrlylnrmwcnertvarffnevnqaiavdedqlsrildalfpptdevnwqkvaaavaltrrisvisggpgtgktttvakllaaliqmadgercrirlaaptgkaaarlteslgaalrqlpltdaqkkripedastlhrllgaqpgsqrlrhhagnplhldvlvvdeasmidlpmmsrlidalpphgrviflgdrdqlasveagavlgdicayvnagftaerarqlsrltgsaipagagtqaaslrdslcllqksyrfgsdsgigklaaaincgdrsaiqavfqqgfsdiekrtlqssddyagmldealagygrylrllhekaapeailqafneyqllcalregpfgvrglndrieqamvqqrkiqrhphsrwyegrpvmiarndsalglfngdigialdrgqglrvwfvmpdgtiksvqpsrlpehdttwamtvhksqgsefdhaalilpsqrspvvtrelvytavtrarrrlslyaderilagaivtrterrsglatlfdevsrig* ->17 -mqevamssqeaskmlrtyniawwgnnyydvnelghisvcpdpdvpearvdlaklvkareaqgqrlpalfcfpqilqhrlrsinaafkraresygyngdyflvypikvnqhrrvieslihsgeplgleagskaelmavlahagmtrsvivcngykdreyirlaligekmghkvylviekmseiaivleeaerlnvvprlgvrarlasqgsgkwqssggekskfglaatqvlqlvetlrdagrldslqllhfhlgsqmanirdiatgvresarfyvelhklgvniqcfdvggglgvdyegtrsqsdcsvnyglneyanniiwaigdaceehglphptvitesgravtahhtvlvsniigverneytdptapaedapralqnlwetwqemhkpgtrrslrewlhdsqmdlhdihigyssgafslqerawaeqlylsmchevqkqldpqnrahrpiidelqermadkmyvnfslfqsmpdawgidqlfpvlplegldqvperravllditcdsdgaidhyidgdgiattmpmpeydpenppmlgffmvgayqeilgnmhnlfgdteavdvfvfpdgsvevelsdegdtvadmlqyvqldpktllthfrdqvkqtdlddalqqqfleefeaglygytylede* ->18 -mnslpqrstdfelttsqdgfalswqqrlilrhsaenpclwigagvadidmfrgnfsikdklnekialteatvselpdgwlvqfsrgatisatlrisadeagrltldlqnddlhhnriwlrlaanpddhiygcgeqfsyfdlrgkpfplwtseqgvgrnktsyvtwqadckenaggdyyltffpqptfvstqkyychvdnscymnfdfsapeyhelalwedkttlrfecadtyiallekltallgrqpelpdwvydgvtlgiqggtevcqqkldnmrnagvkvygiwaqdwsgirmtsfgkrvmwnwkwnsdnypqldsrikqwkeegvqflsyinpyvasdkdlcaeaarhgylakdatggdylvefgefyggvvdltnpeaydwfkdvikknmialgcsgwmadfgeylptdtylhngvsaelmhnawpalwakcnyealqktgklgeilffmragytgsqkystmmwagdqnvdwslddglasvvpaalslamtghglhhsdiggyttlfdmkrskelllrwcdfsaftpmmrthegnrpsndwqfdgdaetiahfarmttvfttlkpylkqavaqnaatglpvmrplflhyendaatytlkyqyllgqdllvapvheqgrcdwtlylpedhwvniwtgeahhggeisvdapigkppvfyraksewallfaslrni* ->19 -mkhlrvvacmimlalagcdnndktapttkseapavaqpspaqdpsqlqklaqqsqgkaltlldaseaqldgaatlvltfsipldpeqdfsrvvhvvdkksgsvdgawelapnlkelrlrhlepervlvvtvdpavkalnnatfgksyektittrdvqpsvgfasrgsllpgkiaeglpvmalnvnhvdvnffrvkpgslasfvsqweyrsslsnwesdnllkmadlvytgrfdlnparntreklllplsdikplqqagvyvavmnqaghynysnaatlftlsdigvsahryhsrldiftqslengaaqsgieivllndkgqtlaqatsdaqghvqleadkaaalllarkegqttlldltlpaldlsefnvagapgyskqffmfgprdlyrpgetvilngllrdsdgktlpdqpvklevvkpdgqvmrtvvsqpenglyrlnypldinaptglwhvrantgdnllrswdfhvedfmpermalnltaqktplapadevkfsvvgyylygapangntlqgqlflrplrdavaalpgfqfgniaeenlsrsldevqltldkggrgevsaasqwqeahsplqvilqasllesggrpvtrrveqaiwpadtlpgirpqfaakavydyrtdttvnqpivdedsnaafdivyanaqgekkavsglqvrlirerrdyywnwsesegwqsqfdqkdlvegeqtldlnadetgkvsfpvewgayrlevkapnetvssvrfwagyswqdnsdgsgaarpdrvtlkldkanyrpgdtmklhiaapvagkgyamvessdgplwwqaidvpaqgleltipvdktwnrhdlylstlvvrpgdksrsatpkravgllhlplgddnrrldlalespakmrpnqpltvrvkasvkhgempkqinvlvsavdsgvlnitdyatpdpwqaffgqkrygadiydiygqviegqgrlaalrfggdgddltrggkppvnhaniiaqqaqpitlneqgegvvtlpigdfngelrvmaqawtaddfgrgeskvvvaapviaelnmprflaggdvsrlvldvtnltdrpqtlnialaasgllellsqqpqpvnlapgvrttlfvpvralegfgegeiqatisglnlpgetlgaqhkqwqigvrpawpaqtvnsgialapgeswhvpeqhlanvspatlqgqlllsgkpplnlaryirelkaypygcleqttsglfpalytnaaqlqslgitgdsdekrraavdigisrilqmqrdnggfalwdengaeepwltayamdflirageqgysvppeainrgnerllrylqdpgtmlirysdntqastfaaqayaalvlarqqkaplgalreiwerrsqaasglplmqlgialntmgdarrgeeaitlalntprqderqwiadygsslrdnalmlslleennlrpdaqnallsslseqafgqrwlstqennalflaahsrqasagawqaqtsleaqplsgdkaltrnldadqlaalevtntgsqplwlrldssgypssapepasnvlqierqilgtdgqrkslsslrsgelvlvwltvvadrnvpdalvvdllpaglelenqnladssaslpesgsevqnllnqmqqadiqymefrddrfvaavvvnegqpvtlvylaravtpgtyqlaqpqvesmyapqwratgasegllivtp* diff --git a/tests/test_data/outputs/locidex_db/blast/protein/protein.pdb b/tests/test_data/outputs/locidex_db/blast/protein/protein.pdb deleted file mode 100755 index d8c528067647b6400a5fdf8d0313c7a2cad70be2..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 20480 zcmeI%K}y3w6adg~6q;Q`dIxdm5xk0v77Ch(i0fXLc8LzHSR`qJ}w(a(SGJN_5SK!5-N0t5&UAV7cs0RjXF z=U(BT*bOt>;5yXkpKY#1PBlyK!5-N0t5)0q`;)BI<@{! w&&|thQOEv|fUf`FR2%*ONxoY}6Cgl<009C72oNAZfB=E9K)0X?8GE&u=k diff --git a/tests/test_data/outputs/locidex_db/blast/protein/protein.phr b/tests/test_data/outputs/locidex_db/blast/protein/protein.phr deleted file mode 100755 index 5c45aa957a4f0d1cd7818e3a55a2b3d1e2230b5f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1290 zcmajdF%E(-07l`qf(U}5Zl0n=5M50d6B83J(5v()=;$fcK!PObV=7I)*X7q<`^Imj z7UKRsKHJ~;baB_4b!YpzZoHD>`+M{ky7LD7ATZ(!@Cx4FfuDdc!3W?g@FDm;_$l}r zd<6ageg^&sKK}2Op#dfopasq;Kod+UKpV^`KqFjGfL6Gq0L?I`0PS!^0UF|(0<^@< GkH8C-tr(mD diff --git a/tests/test_data/outputs/locidex_db/blast/protein/protein.pin b/tests/test_data/outputs/locidex_db/blast/protein/protein.pin deleted file mode 100755 index d82020df7a61c1140cdf102496cf9ef7e1c9a30b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 280 zcmXw!%L)Nu7>3_5PLU+aO8rSznh}z$MixqmJJ5`wCS%BJ@#8meLIv$;B%j^};L`CDbcq)uh{S5PLV!7$le zIHH^rg?ynTM5S6PidyU25lF04t^@c-KTo6GlB+vf(r6h=j6EP4*^(O|Ga=W>j@$vS zpESsxJh*-cZj%fxHFz=H@Nb;3E+45IQrF@9HV|-t-~|Y!dBhWloPp>Gh`qV$ikC2- GI(z`nvn#>? diff --git a/tests/test_data/outputs/locidex_db/blast/protein/protein.pjs b/tests/test_data/outputs/locidex_db/blast/protein/protein.pjs deleted file mode 100755 index fd87ea1..0000000 --- a/tests/test_data/outputs/locidex_db/blast/protein/protein.pjs +++ /dev/null @@ -1,22 +0,0 @@ -{ - "version": "1.2", - "dbname": "protein", - "dbtype": "Protein", - "db-version": 5, - "description": "locidex_db/blast/protein/protein.fasta", - "number-of-letters": 7547, - "number-of-sequences": 20, - "last-updated": "2024-06-06T08:43:00", - "number-of-volumes": 1, - "bytes-total": 46334, - "bytes-to-cache": 7848, - "files": [ - "protein.pdb", - "protein.phr", - "protein.pin", - "protein.pot", - "protein.psq", - "protein.ptf", - "protein.pto" - ] -} diff --git a/tests/test_data/outputs/locidex_db/blast/protein/protein.pot b/tests/test_data/outputs/locidex_db/blast/protein/protein.pot deleted file mode 100755 index 754c9ab7fa1e8f21ae1d5ba10426f2e2f8bdb1c3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 248 zcmb`&w*i14001yMq5>BFzm6_3`Gk)z<}97Nd5mm+Q*q0jk>Fn7qGAI;{4U;dyg8N*1X+{(dC$PK4%?O z4dVa()uK`Nc22k3x|dXmZuxz3ziziBvcuXbJ69$6)7(fS<(zg3az5vH#m>X|U{6@B z^nS(}J*f6Hgy*}`=9*JyDK%vu;s`2n9d~E!LG}t^WMB62KIZ064!sq1(aVyj?gr8P z)e*9kZ-_%6E#k6JAh%cACxU`%;*L8x|ST>&Y$#aZe;Up7YuqI#j0<%2Zk@x7@r-4ei}H z=P52(WlQwyOZ`OU(?2&(*SXeNg&K{%+$dqFYpw}(`Nq=zPG4RrSy{T;$efnGm9|`7 zYv%Zc^2*q}k7`FNWgoB3Rl=c+sqPRi~%E{D>}+;u1~&L!98r#CT@5)o$`8+;`3;) z>07JqGhH6b?aR-_pz99Mt^Na$nOEdSNomR8H2GxRsijZ%%g0U!WTNlk{Yq1x3!I2q z^Ah6eHV?8IFU`OjL`h-vrstqxb61r-Tg`D#$|M_bgaTwJw9j)L@!{m{iwj3i&m2Zr(Z|^4 z=4Vd>#T*{!&7Hm5qbm=oljAsMsN6H?kTbBSTne;8$>D(n3fzKdxW&}NFt_G|)$W8= z{3@>L84G9}=jrxtWgMt!_fE(73>j27&%?FFb!BX4SoK|Pxk&CSgJHK^niy!vFKV5VNb%MH`k1O5+{kYvX7hAUNOVF zNMGc1{7LSh#?6)Eg4^6)&&vC@n2%9E)#BE9We6 zjPU?ZiO>+zaN0nncvQFP0mn#;9}tFHed^oIdi5G}*DlBr@f)F;gF<M2xZ^v$Yd9|h0SwZ*u2X=!r% zn^Xuv&YT{)DZ$rOaWFGnd`DUVDhIVtf9orr7?D1p(HUVnjbF$@fv73^l{0g@P0jUP=^>O^5ZJlq&nbG_@qFO9f%zXC@16#Zm>Im&6(KxM1{H=!MxejFMLqAhoLkh zVbNh5ra8x=gYk7kVK`?BG3D;WTV@f?y{2Fj(7yohPpGcVd3u&{dsuAMJ~ae={_o_@Zf31BNJ>2 z%nl-FeAq~rLkYKr0C;Rn&#WDSyK}~`i~xP*@J@z)sUDNYOd8tgGP+8&$B@LEkOhn< zp9HKRtafmo1#U{WhX#>SM4!3zV-6E0KQIpwQpDymuMCSd`>Y8s#UkQ*9^{1_{1xJP#Su2@4{ z{x%DG_n3`Phq_=6NnToZlbAvdy8-si?H?47L@|Ts9~bIQP|#`lb+MA~pO1Pe>Pi8)OlNG&s~FYs0cnP+tp}b(`xsrwqA_!21>pD) zQ8Jxwj7VA>5=(P*a5{|eY@t+3P%d=jf!e_Vj6jCK6{cHt!o^rnK1~S#SVxHRCzP!q zLC_2LSk-521tJU?41C7{zfdACB`Us8@#_2*&~Fd=69P&ppd{vH74zfJrod%35S@ew zwlN6we1`#F4itU)g$9b72phl4f+euRE9l)92p zGs;VD;YNUs0PGTXB0c97NbHpM;ugRPW5~>lhkltMOalNWh0J=t7ADW0xcY!-I;^M< z5F@3-X=`M*2o~)QTmTQ@AV72BQ4o1T99ZMs9F!yFQ62Veegp#kPCzV1Je z-5xzJ6Lt5P0op<#EbFZS>!rjc@tLR<_J_5>su|l_EB7;)bmj%kz)C`?c&vr^$S=3# z&pm9Ljc@o7l2ruuVWwaq@(vR{sXEDWl=Uz~^Z+qogq_RH3c`yKI@~D>2oLW<7XzN~ z^b96vP2vQZ1SSYT;AU9Q3=dK_m|8L2Gf0nOZik#oSi+c%fma(C?X<|$Fgw6xu$woh zh8AJ{!4268oG{l+N$sAU31NvH0N#QOzV~&^YAOpW5llrXHLhdKg)1QqqQh8Fi)6^* z(#O7q=5cRq)I?vfH?Rfzi;J4sV&*_^>RN*WljkQpi_9<*!-)N-Va*dQ{<`nzr)i~a z9dOZrm>fx?P>lFR615H*i2ES4M@x!fL8RR3MGn<-0Z|F%$siz;lL#avTU3D&#Tr5g zG#u{^p3X;xDFnG^;KAB3%P_1tLwZpcW+I!&(q35qVB)B>QKVHxh;r<6c+V=4%c9b- zf~DV|CXpFYEZP={Obe8@jNX}vFg z7E3^YTH~?y=a&Z;KH*3)q|{*7UXeex60E+k=rIy<*ban@F|P>$LDyg-e{i5+laZrZ zi%^K%IXupI>=}G72*gg2vBV@m&uD|1degE<#}!;`HU{1}4tLx^FY%If(Z*D=RV3Xp zEC73=F`VT?A(&h~0waYaa>HCatGG}Y7*2$*L(RMp?*@A*mU)me{pSA+D|(Buqu`#p zCLAIzA6zunTNqhkvB#TU^r&RPP#}N{*LY;MvezTwv$gyR%P5=>>cKj#kIp&(bu5ns z;PGvJwlDB3f~a4@u+m%8URF`zgZ&@p#BgDaptxyU&+hMe><2d4m-WjI=Npi`ZFv`Xn zDKOR?rCXx$urbvkv#?mQB_qRtyup_2_Ba_XZG&I-V?7LR%u_8xuoGjRc*O-hYjV^3AD&E{3*TmmX%#`vDQiulIBC! zEV^yh)9f43yb=l%unhVnu*jlIO5UfE`{o zrXiVHh{tD9YIHS`^-0C}1NwtHnDWE-m^zo4DZC+ukv$O$Hj?u5xLSnXwid*@;H}i; zbV4fMsUT~-1`#L8DcgZLp(I#g1-3p2AJZ}z{}UiJBfZ9D7*wJF-(g(HoZJVBdZUnh z!!RSm`k-4V%)Uo~GgwVRN-ztK>u<~^JY`$|l%UWk$K0_G8NoPGzz ztb{Cep;G|;*xL)G!G7WhEZ7mFZF$KGj>8f8n3%%ka#vifeIK$;v9ATlUu*%9Pt?YI zdSjLLc#Q9SDDewpOlR0$@E=`gOU`sd>JTnm$0y)T$!bh!9Hb3=ib@d_96v6WlGHlcj#x<0I@s zJb`^Xt=HmSN4^L*2qzAb*|F6IM8gQOd&J^}F^iN{ho%d@iged^f-sIQH!bK;D3rFAgX;#*Kv}km61_n9Pb6qx36F zSNpn#JGEWm@JVfXEKI-;`W#r(LG)vHZu@T{6>CA85b`Nw22=18+b^KTj*d_EC7UG+ zKcyi|6$=Tzcws|F@!%Z#OND?fttZ#oMu+3s2HB(uxI(BNL-jgK{La3Ft)Gs}A+R?a z?wIyn8YjVcWK(D?Lc@1YSi@p{p+HqW9@Fr1T_$d!a|DnwR0}#*Gvzd|71X z{g6z)-S85T*RV^r-#=duIdzJkyQgkAju_6m?CR~ezpidNTg?3z?>t+V5+Fc;009C7 z2oNAZfB*pkw+qz1`!D+c`t+KWnseOrpYYhnoWK3cJb?fK0t5&UAV7cs0RjXF5J-XS zzayHM#HQV}xgruEK!5-N0t5&UAV7cs0RsO);N@6E6QfwqKVBEpx7F%>xjxnP9Q72v tW~JgB=Xra^(*N(D`o%y diff --git a/tests/test_data/outputs/locidex_db/blast/protein/protein.pto b/tests/test_data/outputs/locidex_db/blast/protein/protein.pto deleted file mode 100755 index 91d3a927c3e718edb4b7b0774fed440a3c193069..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 84 vcmXBF2?~H9002QdP16qZ|8H%;g<*RbO*XTzvau66NMue5m5ZB)m&V5r8>|4+ diff --git a/tests/test_data/outputs/locidex_db/config.json b/tests/test_data/outputs/locidex_db/config.json deleted file mode 100755 index 33bc44e..0000000 --- a/tests/test_data/outputs/locidex_db/config.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "db_name": "Locidex Test Database", - "db_version": "1.0.0", - "db_date": "2024/06/06", - "db_author": "James Robertson", - "db_desc": "Using Salmonella GCA_000007545.2 as source data", - "db_num_seqs": 20, - "is_nucl": true, - "is_prot": true, - "nucleotide_db_name": "nucleotide", - "protein_db_name": "protein" -} \ No newline at end of file diff --git a/tests/test_data/outputs/locidex_db/meta.json b/tests/test_data/outputs/locidex_db/meta.json deleted file mode 100755 index b0c01b1..0000000 --- a/tests/test_data/outputs/locidex_db/meta.json +++ /dev/null @@ -1,455 +0,0 @@ -{ - "info": { - "num_seqs": 20, - "is_cds": "True", - "trans_table": 11, - "dna_min_len": 71.4, - "dna_max_len": 3454.5, - "dna_min_ident": 80, - "aa_min_len": 23.8, - "aa_max_len": 1151.5, - "aa_min_ident": 64 - }, - "meta": { - "0": { - "seq_id": 0, - "locus_name": "locus_1", - "locus_name_alt": "SALM_11273", - "locus_product": "!", - "locus_description": "hypothetical protein", - "locus_uid": "1", - "dna_seq_len": 102, - "dna_seq_hash": "d17b02d12afa7f832ee37df6f24a8f55", - "aa_seq_len": 34, - "aa_seq_hash": "84ac553cb45dd790c497c27152888f02", - "dna_min_len": 71.4, - "dna_max_len": 132.6, - "aa_min_len": 23.8, - "aa_max_len": 44.2, - "dna_min_ident": 80, - "aa_min_ident": 64, - "min_dna_match_cov": 80, - "min_aa_match_cov": 80, - "count_int_stops": 0, - "dna_ambig_count": 0 - }, - "1": { - "seq_id": 1, - "locus_name": "locus_2", - "locus_name_alt": "SALM_120", - "locus_product": "@", - "locus_description": "outer membrane protein", - "locus_uid": "B", - "dna_seq_len": 285, - "dna_seq_hash": "e35184c8ff18e9116fc8faef20532f56", - "aa_seq_len": 95, - "aa_seq_hash": "2d9e7f7f0d11bf02db860b0799e7924d", - "dna_min_len": 199.5, - "dna_max_len": 370.5, - "aa_min_len": 66.5, - "aa_max_len": 123.5, - "dna_min_ident": 80, - "aa_min_ident": 64, - "min_dna_match_cov": 80, - "min_aa_match_cov": 80, - "count_int_stops": 0, - "dna_ambig_count": 0 - }, - "2": { - "seq_id": 2, - "locus_name": "locus_3", - "locus_name_alt": "SALM_2016", - "locus_product": "#", - "locus_description": "tRNA (guanosine(46)-N7)-methyltransferase TrmB", - "locus_uid": "C", - "dna_seq_len": 327, - "dna_seq_hash": "670705cd2a59c4a23a897ac656a888fe", - "aa_seq_len": 109, - "aa_seq_hash": "4277f8bbf1fd6682001da089fea83d04", - "dna_min_len": 228.9, - "dna_max_len": 425.1, - "aa_min_len": 76.3, - "aa_max_len": 141.7, - "dna_min_ident": 80, - "aa_min_ident": 64, - "min_dna_match_cov": 80, - "min_aa_match_cov": 80, - "count_int_stops": 0, - "dna_ambig_count": 0 - }, - "3": { - "seq_id": 3, - "locus_name": "locus_4", - "locus_name_alt": "SALM_8644", - "locus_product": "$", - "locus_description": "AZ624_004720", - "locus_uid": "AZ624_004720", - "dna_seq_len": 417, - "dna_seq_hash": "ac1b21798c0f672ad26f5a91ea278590", - "aa_seq_len": 139, - "aa_seq_hash": "0c25367401155278f34832f184ab44e6", - "dna_min_len": 291.9, - "dna_max_len": 542.1, - "aa_min_len": 97.3, - "aa_max_len": 180.7, - "dna_min_ident": 80, - "aa_min_ident": 64, - "min_dna_match_cov": 80, - "min_aa_match_cov": 80, - "count_int_stops": 0, - "dna_ambig_count": 0 - }, - "4": { - "seq_id": 4, - "locus_name": "locus_5", - "locus_name_alt": "SALM_1876", - "locus_product": "%", - "locus_description": "SPI-1 type III secretion system invasion lipoprotein InvH", - "locus_uid": "E", - "dna_seq_len": 444, - "dna_seq_hash": "d00defcca8588f21ce16fa1d0ac13389", - "aa_seq_len": 148, - "aa_seq_hash": "c8bf12a8057fc5e541fcd4924136a40d", - "dna_min_len": 310.8, - "dna_max_len": 577.2, - "aa_min_len": 103.6, - "aa_max_len": 192.4, - "dna_min_ident": 80, - "aa_min_ident": 64, - "min_dna_match_cov": 80, - "min_aa_match_cov": 80, - "count_int_stops": 0, - "dna_ambig_count": 0 - }, - "5": { - "seq_id": 5, - "locus_name": "locus_6", - "locus_name_alt": "SALM_640", - "locus_product": "^", - "locus_description": "MOSC domain-containing protein", - "locus_uid": "F", - "dna_seq_len": 543, - "dna_seq_hash": "a11561f2804e2c32c78049f8b9aeb517", - "aa_seq_len": 181, - "aa_seq_hash": "1f6a45ea291940ef2c17ec1cfdd5fbdd", - "dna_min_len": 380.1, - "dna_max_len": 705.9, - "aa_min_len": 126.7, - "aa_max_len": 235.3, - "dna_min_ident": 80, - "aa_min_ident": 64, - "min_dna_match_cov": 80, - "min_aa_match_cov": 80, - "count_int_stops": 0, - "dna_ambig_count": 0 - }, - "6": { - "seq_id": 6, - "locus_name": "locus_7", - "locus_name_alt": "SALM_1501", - "locus_product": "&", - "locus_description": "India: Vellore", - "locus_uid": "G", - "dna_seq_len": 606, - "dna_seq_hash": "dc94bf1ec4ff9bed2a1f460cbd958656", - "aa_seq_len": 202, - "aa_seq_hash": "62252b3326997117f127efb88ff09294", - "dna_min_len": 424.2, - "dna_max_len": 787.8, - "aa_min_len": 141.4, - "aa_max_len": 262.6, - "dna_min_ident": 80, - "aa_min_ident": 64, - "min_dna_match_cov": 80, - "min_aa_match_cov": 80, - "count_int_stops": 0, - "dna_ambig_count": 0 - }, - "7": { - "seq_id": 7, - "locus_name": "locus_8", - "locus_name_alt": "SALM_756", - "locus_product": "*", - "locus_description": "DNA polymerase III subunit delta'", - "locus_uid": "H", - "dna_seq_len": 642, - "dna_seq_hash": "7ebe74afecf146ec4db816c8deced64f", - "aa_seq_len": 214, - "aa_seq_hash": "2449629747a7c58f0f2cad411db87178", - "dna_min_len": 449.4, - "dna_max_len": 834.6, - "aa_min_len": 149.8, - "aa_max_len": 278.2, - "dna_min_ident": 80, - "aa_min_ident": 64, - "min_dna_match_cov": 80, - "min_aa_match_cov": 80, - "count_int_stops": 0, - "dna_ambig_count": 0 - }, - "8": { - "seq_id": 8, - "locus_name": "locus_9", - "locus_name_alt": "SALM_7353", - "locus_product": "1", - "locus_description": "fimbrial assembly chaperone", - "locus_uid": "I", - "dna_seq_len": 684, - "dna_seq_hash": "41ebb36872854b2b33c8c028e23d8ad1", - "aa_seq_len": 228, - "aa_seq_hash": "a75a93991228940fb46d917f238beb5a", - "dna_min_len": 478.8, - "dna_max_len": 889.2, - "aa_min_len": 159.6, - "aa_max_len": 296.4, - "dna_min_ident": 80, - "aa_min_ident": 64, - "min_dna_match_cov": 80, - "min_aa_match_cov": 80, - "count_int_stops": 0, - "dna_ambig_count": 0 - }, - "9": { - "seq_id": 9, - "locus_name": "locus_10", - "locus_name_alt": "SALM_1891", - "locus_product": "200.96", - "locus_description": "5'/3'-nucleotidase SurE", - "locus_uid": "J", - "dna_seq_len": 762, - "dna_seq_hash": "fe04d17ec353c08b903c85fc0ca4dc02", - "aa_seq_len": 254, - "aa_seq_hash": "bd09702e070040e0fc8d2ec3b830812c", - "dna_min_len": 533.4, - "dna_max_len": 990.6, - "aa_min_len": 177.8, - "aa_max_len": 330.2, - "dna_min_ident": 80, - "aa_min_ident": 64, - "min_dna_match_cov": 80, - "min_aa_match_cov": 80, - "count_int_stops": 0, - "dna_ambig_count": 0 - }, - "10": { - "seq_id": 10, - "locus_name": "locus_11", - "locus_name_alt": "SALM_1452", - "locus_product": "|", - "locus_description": "1-phosphofructokinase", - "locus_uid": "K", - "dna_seq_len": 858, - "dna_seq_hash": "5b128d659955716833ce42f2bb060212", - "aa_seq_len": 286, - "aa_seq_hash": "4daecf1a6ccae76e1d97a4ce9ee4ff0b", - "dna_min_len": 600.6, - "dna_max_len": 1115.4, - "aa_min_len": 200.2, - "aa_max_len": 371.8, - "dna_min_ident": 80, - "aa_min_ident": 64, - "min_dna_match_cov": 80, - "min_aa_match_cov": 80, - "count_int_stops": 0, - "dna_ambig_count": 0 - }, - "11": { - "seq_id": 11, - "locus_name": "locus_12", - "locus_name_alt": "SALM_11020", - "locus_product": "_", - "locus_description": "1", - "locus_uid": "L", - "dna_seq_len": 972, - "dna_seq_hash": "eb72da68c159497d5f0c8eeddc51b5ae", - "aa_seq_len": 324, - "aa_seq_hash": "a1f16dd269dc295e715f2d00f9c26b43", - "dna_min_len": 680.4, - "dna_max_len": 1263.6, - "aa_min_len": 226.8, - "aa_max_len": 421.2, - "dna_min_ident": 80, - "aa_min_ident": 64, - "min_dna_match_cov": 80, - "min_aa_match_cov": 80, - "count_int_stops": 0, - "dna_ambig_count": 0 - }, - "12": { - "seq_id": 12, - "locus_name": "locus_13", - "locus_name_alt": "SALM_1934", - "locus_product": "-", - "locus_description": "3.9", - "locus_uid": "M", - "dna_seq_len": 1098, - "dna_seq_hash": "8f300259dcb46224bdc1fe5273107324", - "aa_seq_len": 366, - "aa_seq_hash": "b16bc26e42f5bc4327504b1ec8b2d53d", - "dna_min_len": 768.6, - "dna_max_len": 1427.4, - "aa_min_len": 256.2, - "aa_max_len": 475.8, - "dna_min_ident": 80, - "aa_min_ident": 64, - "min_dna_match_cov": 80, - "min_aa_match_cov": 80, - "count_int_stops": 0, - "dna_ambig_count": 0 - }, - "13": { - "seq_id": 13, - "locus_name": "locus_14", - "locus_name_alt": "SALM_2871", - "locus_product": "+", - "locus_description": "@", - "locus_uid": "N", - "dna_seq_len": 1281, - "dna_seq_hash": "b9060019038526aa6fc38d2f7510edc6", - "aa_seq_len": 427, - "aa_seq_hash": "ed11561b2bedaa12c7a28eb0e9346101", - "dna_min_len": 896.7, - "dna_max_len": 1665.3, - "aa_min_len": 298.9, - "aa_max_len": 555.1, - "dna_min_ident": 80, - "aa_min_ident": 64, - "min_dna_match_cov": 80, - "min_aa_match_cov": 80, - "count_int_stops": 0, - "dna_ambig_count": 0 - }, - "14": { - "seq_id": 14, - "locus_name": "locus_15", - "locus_name_alt": "SALM_583", - "locus_product": "=", - "locus_description": "DMT family transporter", - "locus_uid": "O", - "dna_seq_len": 1434, - "dna_seq_hash": "bc98c2fe196a68a79036814396513a8d", - "aa_seq_len": 478, - "aa_seq_hash": "16cb2acec887d5861327e04f2705b8ce", - "dna_min_len": 1003.8, - "dna_max_len": 1864.2, - "aa_min_len": 334.6, - "aa_max_len": 621.4, - "dna_min_ident": 80, - "aa_min_ident": 64, - "min_dna_match_cov": 80, - "min_aa_match_cov": 80, - "count_int_stops": 0, - "dna_ambig_count": 0 - }, - "15": { - "seq_id": 15, - "locus_name": "locus_16", - "locus_name_alt": "SALM_780", - "locus_product": "<", - "locus_description": "murein transglycosylase A", - "locus_uid": "P", - "dna_seq_len": 1464, - "dna_seq_hash": "16e55766c603fe33c9e75d8e81743ae2", - "aa_seq_len": 488, - "aa_seq_hash": "b20314e55f9713235e9c4ea5817b56df", - "dna_min_len": 1024.8, - "dna_max_len": 1903.2, - "aa_min_len": 341.6, - "aa_max_len": 634.4, - "dna_min_ident": 80, - "aa_min_ident": 64, - "min_dna_match_cov": 80, - "min_aa_match_cov": 80, - "count_int_stops": 0, - "dna_ambig_count": 0 - }, - "16": { - "seq_id": 16, - "locus_name": "locus_17", - "locus_name_alt": "SALM_1937", - "locus_product": ">", - "locus_description": "GTPase HflX", - "locus_uid": "Q", - "dna_seq_len": 1836, - "dna_seq_hash": "a0d97d985483413f3c18bfe5833ae9ce", - "aa_seq_len": 612, - "aa_seq_hash": "d6012eddc7a6e8d7d40761d00ed71a5a", - "dna_min_len": 1285.2, - "dna_max_len": 2386.8, - "aa_min_len": 428.4, - "aa_max_len": 795.6, - "dna_min_ident": 80, - "aa_min_ident": 64, - "min_dna_match_cov": 80, - "min_aa_match_cov": 80, - "count_int_stops": 0, - "dna_ambig_count": 0 - }, - "17": { - "seq_id": 17, - "locus_name": "locus_18", - "locus_name_alt": "SALM_1997", - "locus_product": "?", - "locus_description": "biosynthetic arginine decarboxylase", - "locus_uid": "R", - "dna_seq_len": 1914, - "dna_seq_hash": "b3021e979faa7600756c06dfadfcf14c", - "aa_seq_len": 638, - "aa_seq_hash": "e68e83956ee1d4c685571e5348c8def1", - "dna_min_len": 1339.8, - "dna_max_len": 2488.2, - "aa_min_len": 446.6, - "aa_max_len": 829.4, - "dna_min_ident": 80, - "aa_min_ident": 64, - "min_dna_match_cov": 80, - "min_aa_match_cov": 80, - "count_int_stops": 0, - "dna_ambig_count": 0 - }, - "18": { - "seq_id": 18, - "locus_name": "locus_19", - "locus_name_alt": "SALM_9926", - "locus_product": ",", - "locus_description": "https://www.ncbi.nlm.nih.gov/nucleotide/CP053702.1?report=genbank&log$=nuclalign&blast_rank=1&RID=63EMBCM3013&from=74626&to=76662", - "locus_uid": "S", - "dna_seq_len": 2037, - "dna_seq_hash": "a012eee23637b48e39b00808a057e35d", - "aa_seq_len": 679, - "aa_seq_hash": "1302fea1dbd5bb756db34e09b863ad44", - "dna_min_len": 1425.9, - "dna_max_len": 2648.1, - "aa_min_len": 475.3, - "aa_max_len": 882.7, - "dna_min_ident": 80, - "aa_min_ident": 64, - "min_dna_match_cov": 80, - "min_aa_match_cov": 80, - "count_int_stops": 0, - "dna_ambig_count": 0 - }, - "19": { - "seq_id": 19, - "locus_name": "locus_20", - "locus_name_alt": "SALM_6064", - "locus_product": ".", - "locus_description": "alpha-2-macroglobulin family protein", - "locus_uid": "T", - "dna_seq_len": 4935, - "dna_seq_hash": "4461918e985715e4a2b07494e1f91326", - "aa_seq_len": 1645, - "aa_seq_hash": "12ff39a1933fa478729b142976b0a659", - "dna_min_len": 3454.5, - "dna_max_len": 6415.5, - "aa_min_len": 1151.5, - "aa_max_len": 2138.5, - "dna_min_ident": 80, - "aa_min_ident": 64, - "min_dna_match_cov": 80, - "min_aa_match_cov": 80, - "count_int_stops": 0, - "dna_ambig_count": 0 - } - } -} \ No newline at end of file diff --git a/tests/test_data/outputs/locidex_db/results.json b/tests/test_data/outputs/locidex_db/results.json deleted file mode 100755 index 509bbcc..0000000 --- a/tests/test_data/outputs/locidex_db/results.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "analysis_start_time": "2024-06-06 08:43:01", - "parameters": { - "input_file": "locidex.loci.txt", - "outdir": "locidex_db", - "name": "test", - "author": "James", - "date": "", - "db_ver": "1.0.0", - "db_desc": "desc", - "force": false - }, - "result_file": "locidex_db", - "analysis_end_time": "2024-06-06 08:43:03" -} \ No newline at end of file diff --git a/tests/test_data/outputs/merge/conservative/profile.tsv b/tests/test_data/outputs/merge/conservative/profile.tsv deleted file mode 100755 index 6082f0b..0000000 --- a/tests/test_data/outputs/merge/conservative/profile.tsv +++ /dev/null @@ -1,15 +0,0 @@ -sample_id locus_1 locus_2 locus_3 locus_4 locus_5 locus_6 locus_7 locus_8 locus_9 locus_10 locus_11 locus_12 locus_13 locus_14 locus_15 locus_16 locus_17 locus_18 locus_19 locus_20 -G10 d17b02d12afa7f832ee37df6f24a8f55 e35184c8ff18e9116fc8faef20532f56 670705cd2a59c4a23a897ac656a888fe ac1b21798c0f672ad26f5a91ea278590 d00defcca8588f21ce16fa1d0ac13389 a11561f2804e2c32c78049f8b9aeb517 dc94bf1ec4ff9bed2a1f460cbd958656 7ebe74afecf146ec4db816c8deced64f 41ebb36872854b2b33c8c028e23d8ad1 fe04d17ec353c08b903c85fc0ca4dc02 - eb72da68c159497d5f0c8eeddc51b5ae 8f300259dcb46224bdc1fe5273107324 b9060019038526aa6fc38d2f7510edc6 bc98c2fe196a68a79036814396513a8d 16e55766c603fe33c9e75d8e81743ae2 a0d97d985483413f3c18bfe5833ae9ce b3021e979faa7600756c06dfadfcf14c a012eee23637b48e39b00808a057e35d 4461918e985715e4a2b07494e1f91326 -G12 d17b02d12afa7f832ee37df6f24a8f55 e35184c8ff18e9116fc8faef20532f56 670705cd2a59c4a23a897ac656a888fe ac1b21798c0f672ad26f5a91ea278590 d00defcca8588f21ce16fa1d0ac13389 a11561f2804e2c32c78049f8b9aeb517 dc94bf1ec4ff9bed2a1f460cbd958656 7ebe74afecf146ec4db816c8deced64f 41ebb36872854b2b33c8c028e23d8ad1 fe04d17ec353c08b903c85fc0ca4dc02 5b128d659955716833ce42f2bb060212 - 8f300259dcb46224bdc1fe5273107324 b9060019038526aa6fc38d2f7510edc6 bc98c2fe196a68a79036814396513a8d 16e55766c603fe33c9e75d8e81743ae2 a0d97d985483413f3c18bfe5833ae9ce b3021e979faa7600756c06dfadfcf14c a012eee23637b48e39b00808a057e35d 4461918e985715e4a2b07494e1f91326 -G13 d17b02d12afa7f832ee37df6f24a8f55 8b70e777f6bbf2c91ff75947824b5976 670705cd2a59c4a23a897ac656a888fe 73790840c76943caac0ebb3b2b3f0b98 8cf4341689dd00f74adfcc43d1f4a35e a11561f2804e2c32c78049f8b9aeb517 49d9878c9d3071aa1d2f26cb947b784c 7ebe74afecf146ec4db816c8deced64f 41ebb36872854b2b33c8c028e23d8ad1 fe04d17ec353c08b903c85fc0ca4dc02 c4266f2f24fdd8e039113c6b0955af9f eb72da68c159497d5f0c8eeddc51b5ae - 2fa0b06ed72e36b4071cab9d0b4f87d0 bc98c2fe196a68a79036814396513a8d a9b3cb97dac3cda6e932a49bf9a507bd a0d97d985483413f3c18bfe5833ae9ce b3021e979faa7600756c06dfadfcf14c de32372598811d63bcc1a0eaf6872644 4461918e985715e4a2b07494e1f91326 -G5 e9e707ebc64e10a881f1323ebff85369 8b70e777f6bbf2c91ff75947824b5976 670705cd2a59c4a23a897ac656a888fe 73790840c76943caac0ebb3b2b3f0b98 8cf4341689dd00f74adfcc43d1f4a35e a11561f2804e2c32c78049f8b9aeb517 49d9878c9d3071aa1d2f26cb947b784c 7ebe74afecf146ec4db816c8deced64f 41ebb36872854b2b33c8c028e23d8ad1 fe04d17ec353c08b903c85fc0ca4dc02 c4266f2f24fdd8e039113c6b0955af9f eb72da68c159497d5f0c8eeddc51b5ae 8f300259dcb46224bdc1fe5273107324 2fa0b06ed72e36b4071cab9d0b4f87d0 bc98c2fe196a68a79036814396513a8d a9b3cb97dac3cda6e932a49bf9a507bd a0d97d985483413f3c18bfe5833ae9ce b3021e979faa7600756c06dfadfcf14c de32372598811d63bcc1a0eaf6872644 4461918e985715e4a2b07494e1f91326 -G7 d17b02d12afa7f832ee37df6f24a8f55 8b70e777f6bbf2c91ff75947824b5976 670705cd2a59c4a23a897ac656a888fe 73790840c76943caac0ebb3b2b3f0b98 8cf4341689dd00f74adfcc43d1f4a35e a11561f2804e2c32c78049f8b9aeb517 49d9878c9d3071aa1d2f26cb947b784c 7ebe74afecf146ec4db816c8deced64f 41ebb36872854b2b33c8c028e23d8ad1 796419469778f7ec3851c813f59cfff7 c4266f2f24fdd8e039113c6b0955af9f eb72da68c159497d5f0c8eeddc51b5ae 8f300259dcb46224bdc1fe5273107324 2fa0b06ed72e36b4071cab9d0b4f87d0 bc98c2fe196a68a79036814396513a8d a9b3cb97dac3cda6e932a49bf9a507bd a0d97d985483413f3c18bfe5833ae9ce b3021e979faa7600756c06dfadfcf14c de32372598811d63bcc1a0eaf6872644 4461918e985715e4a2b07494e1f91326 -G8 d17b02d12afa7f832ee37df6f24a8f55 e35184c8ff18e9116fc8faef20532f56 670705cd2a59c4a23a897ac656a888fe ac1b21798c0f672ad26f5a91ea278590 d00defcca8588f21ce16fa1d0ac13389 a11561f2804e2c32c78049f8b9aeb517 dc94bf1ec4ff9bed2a1f460cbd958656 7ebe74afecf146ec4db816c8deced64f 41ebb36872854b2b33c8c028e23d8ad1 fd6284b58a891cf02058906c9ee37a00 5b128d659955716833ce42f2bb060212 eb72da68c159497d5f0c8eeddc51b5ae 8f300259dcb46224bdc1fe5273107324 b9060019038526aa6fc38d2f7510edc6 bc98c2fe196a68a79036814396513a8d 16e55766c603fe33c9e75d8e81743ae2 a0d97d985483413f3c18bfe5833ae9ce b3021e979faa7600756c06dfadfcf14c a012eee23637b48e39b00808a057e35d 4461918e985715e4a2b07494e1f91326 -G1 d17b02d12afa7f832ee37df6f24a8f55 8b70e777f6bbf2c91ff75947824b5976 670705cd2a59c4a23a897ac656a888fe 73790840c76943caac0ebb3b2b3f0b98 8cf4341689dd00f74adfcc43d1f4a35e a11561f2804e2c32c78049f8b9aeb517 49d9878c9d3071aa1d2f26cb947b784c 7ebe74afecf146ec4db816c8deced64f 41ebb36872854b2b33c8c028e23d8ad1 fe04d17ec353c08b903c85fc0ca4dc02 c4266f2f24fdd8e039113c6b0955af9f eb72da68c159497d5f0c8eeddc51b5ae 8f300259dcb46224bdc1fe5273107324 2fa0b06ed72e36b4071cab9d0b4f87d0 bc98c2fe196a68a79036814396513a8d a9b3cb97dac3cda6e932a49bf9a507bd a0d97d985483413f3c18bfe5833ae9ce b3021e979faa7600756c06dfadfcf14c de32372598811d63bcc1a0eaf6872644 4461918e985715e4a2b07494e1f91326 -G2 d17b02d12afa7f832ee37df6f24a8f55 e35184c8ff18e9116fc8faef20532f56 670705cd2a59c4a23a897ac656a888fe ac1b21798c0f672ad26f5a91ea278590 d00defcca8588f21ce16fa1d0ac13389 a11561f2804e2c32c78049f8b9aeb517 dc94bf1ec4ff9bed2a1f460cbd958656 7ebe74afecf146ec4db816c8deced64f 41ebb36872854b2b33c8c028e23d8ad1 fe04d17ec353c08b903c85fc0ca4dc02 5b128d659955716833ce42f2bb060212 eb72da68c159497d5f0c8eeddc51b5ae 8f300259dcb46224bdc1fe5273107324 b9060019038526aa6fc38d2f7510edc6 bc98c2fe196a68a79036814396513a8d 16e55766c603fe33c9e75d8e81743ae2 a0d97d985483413f3c18bfe5833ae9ce b3021e979faa7600756c06dfadfcf14c a012eee23637b48e39b00808a057e35d 4461918e985715e4a2b07494e1f91326 -G6 a47cc24760462371e919143c5cc81376 e35184c8ff18e9116fc8faef20532f56 670705cd2a59c4a23a897ac656a888fe ac1b21798c0f672ad26f5a91ea278590 d00defcca8588f21ce16fa1d0ac13389 a11561f2804e2c32c78049f8b9aeb517 dc94bf1ec4ff9bed2a1f460cbd958656 7ebe74afecf146ec4db816c8deced64f 41ebb36872854b2b33c8c028e23d8ad1 fe04d17ec353c08b903c85fc0ca4dc02 5b128d659955716833ce42f2bb060212 eb72da68c159497d5f0c8eeddc51b5ae 8f300259dcb46224bdc1fe5273107324 b9060019038526aa6fc38d2f7510edc6 bc98c2fe196a68a79036814396513a8d 16e55766c603fe33c9e75d8e81743ae2 a0d97d985483413f3c18bfe5833ae9ce b3021e979faa7600756c06dfadfcf14c a012eee23637b48e39b00808a057e35d 4461918e985715e4a2b07494e1f91326 -G9 d17b02d12afa7f832ee37df6f24a8f55 8b70e777f6bbf2c91ff75947824b5976 670705cd2a59c4a23a897ac656a888fe 73790840c76943caac0ebb3b2b3f0b98 8cf4341689dd00f74adfcc43d1f4a35e a11561f2804e2c32c78049f8b9aeb517 49d9878c9d3071aa1d2f26cb947b784c 7ebe74afecf146ec4db816c8deced64f 41ebb36872854b2b33c8c028e23d8ad1 - c4266f2f24fdd8e039113c6b0955af9f eb72da68c159497d5f0c8eeddc51b5ae 8f300259dcb46224bdc1fe5273107324 2fa0b06ed72e36b4071cab9d0b4f87d0 bc98c2fe196a68a79036814396513a8d a9b3cb97dac3cda6e932a49bf9a507bd a0d97d985483413f3c18bfe5833ae9ce b3021e979faa7600756c06dfadfcf14c de32372598811d63bcc1a0eaf6872644 4461918e985715e4a2b07494e1f91326 -G14 d17b02d12afa7f832ee37df6f24a8f55 e35184c8ff18e9116fc8faef20532f56 670705cd2a59c4a23a897ac656a888fe ac1b21798c0f672ad26f5a91ea278590 d00defcca8588f21ce16fa1d0ac13389 a11561f2804e2c32c78049f8b9aeb517 dc94bf1ec4ff9bed2a1f460cbd958656 7ebe74afecf146ec4db816c8deced64f 41ebb36872854b2b33c8c028e23d8ad1 fe04d17ec353c08b903c85fc0ca4dc02 5b128d659955716833ce42f2bb060212 eb72da68c159497d5f0c8eeddc51b5ae - b9060019038526aa6fc38d2f7510edc6 bc98c2fe196a68a79036814396513a8d 16e55766c603fe33c9e75d8e81743ae2 a0d97d985483413f3c18bfe5833ae9ce b3021e979faa7600756c06dfadfcf14c a012eee23637b48e39b00808a057e35d 4461918e985715e4a2b07494e1f91326 -G4 d17b02d12afa7f832ee37df6f24a8f55 e35184c8ff18e9116fc8faef20532f56 670705cd2a59c4a23a897ac656a888fe ac1b21798c0f672ad26f5a91ea278590 d00defcca8588f21ce16fa1d0ac13389 a11561f2804e2c32c78049f8b9aeb517 dc94bf1ec4ff9bed2a1f460cbd958656 7ebe74afecf146ec4db816c8deced64f 41ebb36872854b2b33c8c028e23d8ad1 fe04d17ec353c08b903c85fc0ca4dc02 5b128d659955716833ce42f2bb060212 eb72da68c159497d5f0c8eeddc51b5ae 8f300259dcb46224bdc1fe5273107324 b9060019038526aa6fc38d2f7510edc6 bc98c2fe196a68a79036814396513a8d 16e55766c603fe33c9e75d8e81743ae2 a0d97d985483413f3c18bfe5833ae9ce b3021e979faa7600756c06dfadfcf14c a012eee23637b48e39b00808a057e35d 4461918e985715e4a2b07494e1f91326 -G3 d17b02d12afa7f832ee37df6f24a8f55 8b70e777f6bbf2c91ff75947824b5976 670705cd2a59c4a23a897ac656a888fe 73790840c76943caac0ebb3b2b3f0b98 8cf4341689dd00f74adfcc43d1f4a35e a11561f2804e2c32c78049f8b9aeb517 49d9878c9d3071aa1d2f26cb947b784c 7ebe74afecf146ec4db816c8deced64f 41ebb36872854b2b33c8c028e23d8ad1 fe04d17ec353c08b903c85fc0ca4dc02 c4266f2f24fdd8e039113c6b0955af9f eb72da68c159497d5f0c8eeddc51b5ae 8f300259dcb46224bdc1fe5273107324 2fa0b06ed72e36b4071cab9d0b4f87d0 bc98c2fe196a68a79036814396513a8d a9b3cb97dac3cda6e932a49bf9a507bd a0d97d985483413f3c18bfe5833ae9ce b3021e979faa7600756c06dfadfcf14c de32372598811d63bcc1a0eaf6872644 4461918e985715e4a2b07494e1f91326 -G11 d17b02d12afa7f832ee37df6f24a8f55 8b70e777f6bbf2c91ff75947824b5976 670705cd2a59c4a23a897ac656a888fe 73790840c76943caac0ebb3b2b3f0b98 8cf4341689dd00f74adfcc43d1f4a35e a11561f2804e2c32c78049f8b9aeb517 49d9878c9d3071aa1d2f26cb947b784c 7ebe74afecf146ec4db816c8deced64f 41ebb36872854b2b33c8c028e23d8ad1 fe04d17ec353c08b903c85fc0ca4dc02 c4266f2f24fdd8e039113c6b0955af9f - 8f300259dcb46224bdc1fe5273107324 2fa0b06ed72e36b4071cab9d0b4f87d0 bc98c2fe196a68a79036814396513a8d a9b3cb97dac3cda6e932a49bf9a507bd a0d97d985483413f3c18bfe5833ae9ce b3021e979faa7600756c06dfadfcf14c de32372598811d63bcc1a0eaf6872644 4461918e985715e4a2b07494e1f91326 diff --git a/tests/test_data/outputs/merge/conservative/profile_dists/allele_map.json b/tests/test_data/outputs/merge/conservative/profile_dists/allele_map.json deleted file mode 100755 index 476c71a..0000000 --- a/tests/test_data/outputs/merge/conservative/profile_dists/allele_map.json +++ /dev/null @@ -1,78 +0,0 @@ -{ - "locus_1": { - "a47cc24760462371e919143c5cc81376": 1, - "d17b02d12afa7f832ee37df6f24a8f55": 2, - "e9e707ebc64e10a881f1323ebff85369": 3 - }, - "locus_2": { - "8b70e777f6bbf2c91ff75947824b5976": 1, - "e35184c8ff18e9116fc8faef20532f56": 2 - }, - "locus_3": { - "670705cd2a59c4a23a897ac656a888fe": 1 - }, - "locus_4": { - "73790840c76943caac0ebb3b2b3f0b98": 1, - "ac1b21798c0f672ad26f5a91ea278590": 2 - }, - "locus_5": { - "8cf4341689dd00f74adfcc43d1f4a35e": 1, - "d00defcca8588f21ce16fa1d0ac13389": 2 - }, - "locus_6": { - "a11561f2804e2c32c78049f8b9aeb517": 1 - }, - "locus_7": { - "49d9878c9d3071aa1d2f26cb947b784c": 1, - "dc94bf1ec4ff9bed2a1f460cbd958656": 2 - }, - "locus_8": { - "7ebe74afecf146ec4db816c8deced64f": 1 - }, - "locus_9": { - "41ebb36872854b2b33c8c028e23d8ad1": 1 - }, - "locus_10": { - "0": 0, - "796419469778f7ec3851c813f59cfff7": 1, - "fd6284b58a891cf02058906c9ee37a00": 2, - "fe04d17ec353c08b903c85fc0ca4dc02": 3 - }, - "locus_11": { - "0": 0, - "5b128d659955716833ce42f2bb060212": 1, - "c4266f2f24fdd8e039113c6b0955af9f": 2 - }, - "locus_12": { - "0": 0, - "eb72da68c159497d5f0c8eeddc51b5ae": 1 - }, - "locus_13": { - "0": 0, - "8f300259dcb46224bdc1fe5273107324": 1 - }, - "locus_14": { - "2fa0b06ed72e36b4071cab9d0b4f87d0": 1, - "b9060019038526aa6fc38d2f7510edc6": 2 - }, - "locus_15": { - "bc98c2fe196a68a79036814396513a8d": 1 - }, - "locus_16": { - "16e55766c603fe33c9e75d8e81743ae2": 1, - "a9b3cb97dac3cda6e932a49bf9a507bd": 2 - }, - "locus_17": { - "a0d97d985483413f3c18bfe5833ae9ce": 1 - }, - "locus_18": { - "b3021e979faa7600756c06dfadfcf14c": 1 - }, - "locus_19": { - "a012eee23637b48e39b00808a057e35d": 1, - "de32372598811d63bcc1a0eaf6872644": 2 - }, - "locus_20": { - "4461918e985715e4a2b07494e1f91326": 1 - } -} \ No newline at end of file diff --git a/tests/test_data/outputs/merge/conservative/profile_dists/query_profile.text b/tests/test_data/outputs/merge/conservative/profile_dists/query_profile.text deleted file mode 100755 index 19baea7..0000000 --- a/tests/test_data/outputs/merge/conservative/profile_dists/query_profile.text +++ /dev/null @@ -1,15 +0,0 @@ -sample_id locus_1 locus_2 locus_3 locus_4 locus_5 locus_6 locus_7 locus_8 locus_9 locus_10 locus_11 locus_12 locus_13 locus_14 locus_15 locus_16 locus_17 locus_18 locus_19 locus_20 -G10 2 2 1 2 2 1 2 1 1 3 0 1 1 2 1 1 1 1 1 1 -G12 2 2 1 2 2 1 2 1 1 3 1 0 1 2 1 1 1 1 1 1 -G13 2 1 1 1 1 1 1 1 1 3 2 1 0 1 1 2 1 1 2 1 -G5 3 1 1 1 1 1 1 1 1 3 2 1 1 1 1 2 1 1 2 1 -G7 2 1 1 1 1 1 1 1 1 1 2 1 1 1 1 2 1 1 2 1 -G8 2 2 1 2 2 1 2 1 1 2 1 1 1 2 1 1 1 1 1 1 -G1 2 1 1 1 1 1 1 1 1 3 2 1 1 1 1 2 1 1 2 1 -G2 2 2 1 2 2 1 2 1 1 3 1 1 1 2 1 1 1 1 1 1 -G6 1 2 1 2 2 1 2 1 1 3 1 1 1 2 1 1 1 1 1 1 -G9 2 1 1 1 1 1 1 1 1 0 2 1 1 1 1 2 1 1 2 1 -G14 2 2 1 2 2 1 2 1 1 3 1 1 0 2 1 1 1 1 1 1 -G4 2 2 1 2 2 1 2 1 1 3 1 1 1 2 1 1 1 1 1 1 -G3 2 1 1 1 1 1 1 1 1 3 2 1 1 1 1 2 1 1 2 1 -G11 2 1 1 1 1 1 1 1 1 3 2 0 1 1 1 2 1 1 2 1 diff --git a/tests/test_data/outputs/merge/conservative/profile_dists/ref_profile.text b/tests/test_data/outputs/merge/conservative/profile_dists/ref_profile.text deleted file mode 100755 index 19baea7..0000000 --- a/tests/test_data/outputs/merge/conservative/profile_dists/ref_profile.text +++ /dev/null @@ -1,15 +0,0 @@ -sample_id locus_1 locus_2 locus_3 locus_4 locus_5 locus_6 locus_7 locus_8 locus_9 locus_10 locus_11 locus_12 locus_13 locus_14 locus_15 locus_16 locus_17 locus_18 locus_19 locus_20 -G10 2 2 1 2 2 1 2 1 1 3 0 1 1 2 1 1 1 1 1 1 -G12 2 2 1 2 2 1 2 1 1 3 1 0 1 2 1 1 1 1 1 1 -G13 2 1 1 1 1 1 1 1 1 3 2 1 0 1 1 2 1 1 2 1 -G5 3 1 1 1 1 1 1 1 1 3 2 1 1 1 1 2 1 1 2 1 -G7 2 1 1 1 1 1 1 1 1 1 2 1 1 1 1 2 1 1 2 1 -G8 2 2 1 2 2 1 2 1 1 2 1 1 1 2 1 1 1 1 1 1 -G1 2 1 1 1 1 1 1 1 1 3 2 1 1 1 1 2 1 1 2 1 -G2 2 2 1 2 2 1 2 1 1 3 1 1 1 2 1 1 1 1 1 1 -G6 1 2 1 2 2 1 2 1 1 3 1 1 1 2 1 1 1 1 1 1 -G9 2 1 1 1 1 1 1 1 1 0 2 1 1 1 1 2 1 1 2 1 -G14 2 2 1 2 2 1 2 1 1 3 1 1 0 2 1 1 1 1 1 1 -G4 2 2 1 2 2 1 2 1 1 3 1 1 1 2 1 1 1 1 1 1 -G3 2 1 1 1 1 1 1 1 1 3 2 1 1 1 1 2 1 1 2 1 -G11 2 1 1 1 1 1 1 1 1 3 2 0 1 1 1 2 1 1 2 1 diff --git a/tests/test_data/outputs/merge/conservative/profile_dists/results.text b/tests/test_data/outputs/merge/conservative/profile_dists/results.text deleted file mode 100755 index b27a212..0000000 --- a/tests/test_data/outputs/merge/conservative/profile_dists/results.text +++ /dev/null @@ -1,15 +0,0 @@ -dists G10 G12 G13 G5 G7 G8 G1 G2 G6 G9 G14 G4 G3 G11 -G10 0 0 7 8 8 1 7 0 1 7 0 0 7 7 -G12 0 0 8 9 9 1 8 0 1 8 0 0 8 8 -G13 7 8 0 1 1 9 0 8 9 0 8 8 0 0 -G5 8 9 1 0 2 10 1 9 9 1 9 9 1 1 -G7 8 9 1 2 0 9 1 9 10 0 9 9 1 1 -G8 1 1 9 10 9 0 9 1 2 8 1 1 9 9 -G1 7 8 0 1 1 9 0 8 9 0 8 8 0 0 -G2 0 0 8 9 9 1 8 0 1 8 0 0 8 8 -G6 1 1 9 9 10 2 9 1 0 9 1 1 9 9 -G9 7 8 0 1 0 8 0 8 9 0 8 8 0 0 -G14 0 0 8 9 9 1 8 0 1 8 0 0 8 8 -G4 0 0 8 9 9 1 8 0 1 8 0 0 8 8 -G3 7 8 0 1 1 9 0 8 9 0 8 8 0 0 -G11 7 8 0 1 1 9 0 8 9 0 8 8 0 0 diff --git a/tests/test_data/outputs/merge/conservative/profile_dists/run.json b/tests/test_data/outputs/merge/conservative/profile_dists/run.json deleted file mode 100755 index d548f75..0000000 --- a/tests/test_data/outputs/merge/conservative/profile_dists/run.json +++ /dev/null @@ -1,38 +0,0 @@ -{ - "profile_dists": "version: 1.0.0", - "analysis_start_time": "10/06/2024 11:14:41", - "analysis_end_time": "10/06/2024 11:14:48", - "parameters": { - "query": "locidex/merge/conservative/profile.tsv", - "ref": "locidex/merge/conservative/profile.tsv", - "outdir": "locidex/merge/conservative/profile_dists", - "outfmt": "matrix", - "file_type": "text", - "distm": "hamming", - "missing_thresh": 1.0, - "sample_qual_thresh": 1.0, - "match_threshold": -1, - "mapping_file": null, - "batch_size": null, - "max_mem": null, - "force": false, - "skip": false, - "columns": null, - "count_missing": false, - "cpus": 1 - }, - "query_profile_info": { - "num_samples": 14, - "num_samples_pass": 14, - "failed_samples": [], - "parsed_file_path": "locidex/merge/conservative/profile_dists/query_profile.text" - }, - "ref_profile_info": { - "num_samples": 14, - "num_samples_pass": 14, - "failed_samples": [], - "parsed_file_path": "locidex/merge/conservative/profile_dists/ref_profile.text" - }, - "loci_removed": [], - "result_file": "locidex/merge/conservative/profile_dists/results.text" -} \ No newline at end of file diff --git a/tests/test_data/outputs/merge/normal/profile.tsv b/tests/test_data/outputs/merge/normal/profile.tsv deleted file mode 100755 index 245ca6f..0000000 --- a/tests/test_data/outputs/merge/normal/profile.tsv +++ /dev/null @@ -1,15 +0,0 @@ -sample_id locus_1 locus_2 locus_3 locus_4 locus_5 locus_6 locus_7 locus_8 locus_9 locus_10 locus_11 locus_12 locus_13 locus_14 locus_15 locus_16 locus_17 locus_18 locus_19 locus_20 -G10 d17b02d12afa7f832ee37df6f24a8f55 e35184c8ff18e9116fc8faef20532f56 670705cd2a59c4a23a897ac656a888fe ac1b21798c0f672ad26f5a91ea278590 d00defcca8588f21ce16fa1d0ac13389 a11561f2804e2c32c78049f8b9aeb517 dc94bf1ec4ff9bed2a1f460cbd958656 7ebe74afecf146ec4db816c8deced64f 41ebb36872854b2b33c8c028e23d8ad1 fe04d17ec353c08b903c85fc0ca4dc02 - eb72da68c159497d5f0c8eeddc51b5ae 8f300259dcb46224bdc1fe5273107324 b9060019038526aa6fc38d2f7510edc6 bc98c2fe196a68a79036814396513a8d 16e55766c603fe33c9e75d8e81743ae2 a0d97d985483413f3c18bfe5833ae9ce b3021e979faa7600756c06dfadfcf14c a012eee23637b48e39b00808a057e35d 4461918e985715e4a2b07494e1f91326 -G12 d17b02d12afa7f832ee37df6f24a8f55 e35184c8ff18e9116fc8faef20532f56 670705cd2a59c4a23a897ac656a888fe ac1b21798c0f672ad26f5a91ea278590 d00defcca8588f21ce16fa1d0ac13389 a11561f2804e2c32c78049f8b9aeb517 dc94bf1ec4ff9bed2a1f460cbd958656 7ebe74afecf146ec4db816c8deced64f 41ebb36872854b2b33c8c028e23d8ad1 fe04d17ec353c08b903c85fc0ca4dc02 5b128d659955716833ce42f2bb060212 60934464690fea26102d1c8c9acb755d 8f300259dcb46224bdc1fe5273107324 b9060019038526aa6fc38d2f7510edc6 bc98c2fe196a68a79036814396513a8d 16e55766c603fe33c9e75d8e81743ae2 a0d97d985483413f3c18bfe5833ae9ce b3021e979faa7600756c06dfadfcf14c a012eee23637b48e39b00808a057e35d 4461918e985715e4a2b07494e1f91326 -G13 d17b02d12afa7f832ee37df6f24a8f55 8b70e777f6bbf2c91ff75947824b5976 670705cd2a59c4a23a897ac656a888fe 73790840c76943caac0ebb3b2b3f0b98 8cf4341689dd00f74adfcc43d1f4a35e a11561f2804e2c32c78049f8b9aeb517 49d9878c9d3071aa1d2f26cb947b784c 7ebe74afecf146ec4db816c8deced64f 41ebb36872854b2b33c8c028e23d8ad1 fe04d17ec353c08b903c85fc0ca4dc02 c4266f2f24fdd8e039113c6b0955af9f eb72da68c159497d5f0c8eeddc51b5ae 70e88b95c11c37150f37312882af5771 2fa0b06ed72e36b4071cab9d0b4f87d0 bc98c2fe196a68a79036814396513a8d a9b3cb97dac3cda6e932a49bf9a507bd a0d97d985483413f3c18bfe5833ae9ce b3021e979faa7600756c06dfadfcf14c de32372598811d63bcc1a0eaf6872644 4461918e985715e4a2b07494e1f91326 -G5 e9e707ebc64e10a881f1323ebff85369 8b70e777f6bbf2c91ff75947824b5976 670705cd2a59c4a23a897ac656a888fe 73790840c76943caac0ebb3b2b3f0b98 8cf4341689dd00f74adfcc43d1f4a35e a11561f2804e2c32c78049f8b9aeb517 49d9878c9d3071aa1d2f26cb947b784c 7ebe74afecf146ec4db816c8deced64f 41ebb36872854b2b33c8c028e23d8ad1 fe04d17ec353c08b903c85fc0ca4dc02 c4266f2f24fdd8e039113c6b0955af9f eb72da68c159497d5f0c8eeddc51b5ae 8f300259dcb46224bdc1fe5273107324 2fa0b06ed72e36b4071cab9d0b4f87d0 bc98c2fe196a68a79036814396513a8d a9b3cb97dac3cda6e932a49bf9a507bd a0d97d985483413f3c18bfe5833ae9ce b3021e979faa7600756c06dfadfcf14c de32372598811d63bcc1a0eaf6872644 4461918e985715e4a2b07494e1f91326 -G7 d17b02d12afa7f832ee37df6f24a8f55 8b70e777f6bbf2c91ff75947824b5976 670705cd2a59c4a23a897ac656a888fe 73790840c76943caac0ebb3b2b3f0b98 8cf4341689dd00f74adfcc43d1f4a35e a11561f2804e2c32c78049f8b9aeb517 49d9878c9d3071aa1d2f26cb947b784c 7ebe74afecf146ec4db816c8deced64f 41ebb36872854b2b33c8c028e23d8ad1 796419469778f7ec3851c813f59cfff7 c4266f2f24fdd8e039113c6b0955af9f eb72da68c159497d5f0c8eeddc51b5ae 8f300259dcb46224bdc1fe5273107324 2fa0b06ed72e36b4071cab9d0b4f87d0 bc98c2fe196a68a79036814396513a8d a9b3cb97dac3cda6e932a49bf9a507bd a0d97d985483413f3c18bfe5833ae9ce b3021e979faa7600756c06dfadfcf14c de32372598811d63bcc1a0eaf6872644 4461918e985715e4a2b07494e1f91326 -G8 d17b02d12afa7f832ee37df6f24a8f55 e35184c8ff18e9116fc8faef20532f56 670705cd2a59c4a23a897ac656a888fe ac1b21798c0f672ad26f5a91ea278590 d00defcca8588f21ce16fa1d0ac13389 a11561f2804e2c32c78049f8b9aeb517 dc94bf1ec4ff9bed2a1f460cbd958656 7ebe74afecf146ec4db816c8deced64f 41ebb36872854b2b33c8c028e23d8ad1 fd6284b58a891cf02058906c9ee37a00 5b128d659955716833ce42f2bb060212 eb72da68c159497d5f0c8eeddc51b5ae 8f300259dcb46224bdc1fe5273107324 b9060019038526aa6fc38d2f7510edc6 bc98c2fe196a68a79036814396513a8d 16e55766c603fe33c9e75d8e81743ae2 a0d97d985483413f3c18bfe5833ae9ce b3021e979faa7600756c06dfadfcf14c a012eee23637b48e39b00808a057e35d 4461918e985715e4a2b07494e1f91326 -G1 d17b02d12afa7f832ee37df6f24a8f55 8b70e777f6bbf2c91ff75947824b5976 670705cd2a59c4a23a897ac656a888fe 73790840c76943caac0ebb3b2b3f0b98 8cf4341689dd00f74adfcc43d1f4a35e a11561f2804e2c32c78049f8b9aeb517 49d9878c9d3071aa1d2f26cb947b784c 7ebe74afecf146ec4db816c8deced64f 41ebb36872854b2b33c8c028e23d8ad1 fe04d17ec353c08b903c85fc0ca4dc02 c4266f2f24fdd8e039113c6b0955af9f eb72da68c159497d5f0c8eeddc51b5ae 8f300259dcb46224bdc1fe5273107324 2fa0b06ed72e36b4071cab9d0b4f87d0 bc98c2fe196a68a79036814396513a8d a9b3cb97dac3cda6e932a49bf9a507bd a0d97d985483413f3c18bfe5833ae9ce b3021e979faa7600756c06dfadfcf14c de32372598811d63bcc1a0eaf6872644 4461918e985715e4a2b07494e1f91326 -G2 d17b02d12afa7f832ee37df6f24a8f55 e35184c8ff18e9116fc8faef20532f56 670705cd2a59c4a23a897ac656a888fe ac1b21798c0f672ad26f5a91ea278590 d00defcca8588f21ce16fa1d0ac13389 a11561f2804e2c32c78049f8b9aeb517 dc94bf1ec4ff9bed2a1f460cbd958656 7ebe74afecf146ec4db816c8deced64f 41ebb36872854b2b33c8c028e23d8ad1 fe04d17ec353c08b903c85fc0ca4dc02 5b128d659955716833ce42f2bb060212 eb72da68c159497d5f0c8eeddc51b5ae 8f300259dcb46224bdc1fe5273107324 b9060019038526aa6fc38d2f7510edc6 bc98c2fe196a68a79036814396513a8d 16e55766c603fe33c9e75d8e81743ae2 a0d97d985483413f3c18bfe5833ae9ce b3021e979faa7600756c06dfadfcf14c a012eee23637b48e39b00808a057e35d 4461918e985715e4a2b07494e1f91326 -G6 a47cc24760462371e919143c5cc81376 e35184c8ff18e9116fc8faef20532f56 670705cd2a59c4a23a897ac656a888fe ac1b21798c0f672ad26f5a91ea278590 d00defcca8588f21ce16fa1d0ac13389 a11561f2804e2c32c78049f8b9aeb517 dc94bf1ec4ff9bed2a1f460cbd958656 7ebe74afecf146ec4db816c8deced64f 41ebb36872854b2b33c8c028e23d8ad1 fe04d17ec353c08b903c85fc0ca4dc02 5b128d659955716833ce42f2bb060212 eb72da68c159497d5f0c8eeddc51b5ae 8f300259dcb46224bdc1fe5273107324 b9060019038526aa6fc38d2f7510edc6 bc98c2fe196a68a79036814396513a8d 16e55766c603fe33c9e75d8e81743ae2 a0d97d985483413f3c18bfe5833ae9ce b3021e979faa7600756c06dfadfcf14c a012eee23637b48e39b00808a057e35d 4461918e985715e4a2b07494e1f91326 -G9 d17b02d12afa7f832ee37df6f24a8f55 8b70e777f6bbf2c91ff75947824b5976 670705cd2a59c4a23a897ac656a888fe 73790840c76943caac0ebb3b2b3f0b98 8cf4341689dd00f74adfcc43d1f4a35e a11561f2804e2c32c78049f8b9aeb517 49d9878c9d3071aa1d2f26cb947b784c 7ebe74afecf146ec4db816c8deced64f 41ebb36872854b2b33c8c028e23d8ad1 - c4266f2f24fdd8e039113c6b0955af9f eb72da68c159497d5f0c8eeddc51b5ae 8f300259dcb46224bdc1fe5273107324 2fa0b06ed72e36b4071cab9d0b4f87d0 bc98c2fe196a68a79036814396513a8d a9b3cb97dac3cda6e932a49bf9a507bd a0d97d985483413f3c18bfe5833ae9ce b3021e979faa7600756c06dfadfcf14c de32372598811d63bcc1a0eaf6872644 4461918e985715e4a2b07494e1f91326 -G14 d17b02d12afa7f832ee37df6f24a8f55 e35184c8ff18e9116fc8faef20532f56 670705cd2a59c4a23a897ac656a888fe ac1b21798c0f672ad26f5a91ea278590 d00defcca8588f21ce16fa1d0ac13389 a11561f2804e2c32c78049f8b9aeb517 dc94bf1ec4ff9bed2a1f460cbd958656 7ebe74afecf146ec4db816c8deced64f 41ebb36872854b2b33c8c028e23d8ad1 fe04d17ec353c08b903c85fc0ca4dc02 5b128d659955716833ce42f2bb060212 eb72da68c159497d5f0c8eeddc51b5ae 70e88b95c11c37150f37312882af5771 b9060019038526aa6fc38d2f7510edc6 bc98c2fe196a68a79036814396513a8d 16e55766c603fe33c9e75d8e81743ae2 a0d97d985483413f3c18bfe5833ae9ce b3021e979faa7600756c06dfadfcf14c a012eee23637b48e39b00808a057e35d 4461918e985715e4a2b07494e1f91326 -G4 d17b02d12afa7f832ee37df6f24a8f55 e35184c8ff18e9116fc8faef20532f56 670705cd2a59c4a23a897ac656a888fe ac1b21798c0f672ad26f5a91ea278590 d00defcca8588f21ce16fa1d0ac13389 a11561f2804e2c32c78049f8b9aeb517 dc94bf1ec4ff9bed2a1f460cbd958656 7ebe74afecf146ec4db816c8deced64f 41ebb36872854b2b33c8c028e23d8ad1 fe04d17ec353c08b903c85fc0ca4dc02 5b128d659955716833ce42f2bb060212 eb72da68c159497d5f0c8eeddc51b5ae 8f300259dcb46224bdc1fe5273107324 b9060019038526aa6fc38d2f7510edc6 bc98c2fe196a68a79036814396513a8d 16e55766c603fe33c9e75d8e81743ae2 a0d97d985483413f3c18bfe5833ae9ce b3021e979faa7600756c06dfadfcf14c a012eee23637b48e39b00808a057e35d 4461918e985715e4a2b07494e1f91326 -G3 d17b02d12afa7f832ee37df6f24a8f55 8b70e777f6bbf2c91ff75947824b5976 670705cd2a59c4a23a897ac656a888fe 73790840c76943caac0ebb3b2b3f0b98 8cf4341689dd00f74adfcc43d1f4a35e a11561f2804e2c32c78049f8b9aeb517 49d9878c9d3071aa1d2f26cb947b784c 7ebe74afecf146ec4db816c8deced64f 41ebb36872854b2b33c8c028e23d8ad1 fe04d17ec353c08b903c85fc0ca4dc02 c4266f2f24fdd8e039113c6b0955af9f eb72da68c159497d5f0c8eeddc51b5ae 8f300259dcb46224bdc1fe5273107324 2fa0b06ed72e36b4071cab9d0b4f87d0 bc98c2fe196a68a79036814396513a8d a9b3cb97dac3cda6e932a49bf9a507bd a0d97d985483413f3c18bfe5833ae9ce b3021e979faa7600756c06dfadfcf14c de32372598811d63bcc1a0eaf6872644 4461918e985715e4a2b07494e1f91326 -G11 d17b02d12afa7f832ee37df6f24a8f55 8b70e777f6bbf2c91ff75947824b5976 670705cd2a59c4a23a897ac656a888fe 73790840c76943caac0ebb3b2b3f0b98 8cf4341689dd00f74adfcc43d1f4a35e a11561f2804e2c32c78049f8b9aeb517 49d9878c9d3071aa1d2f26cb947b784c 7ebe74afecf146ec4db816c8deced64f 41ebb36872854b2b33c8c028e23d8ad1 fe04d17ec353c08b903c85fc0ca4dc02 c4266f2f24fdd8e039113c6b0955af9f 60934464690fea26102d1c8c9acb755d 8f300259dcb46224bdc1fe5273107324 2fa0b06ed72e36b4071cab9d0b4f87d0 bc98c2fe196a68a79036814396513a8d a9b3cb97dac3cda6e932a49bf9a507bd a0d97d985483413f3c18bfe5833ae9ce b3021e979faa7600756c06dfadfcf14c de32372598811d63bcc1a0eaf6872644 4461918e985715e4a2b07494e1f91326 diff --git a/tests/test_data/outputs/merge/normal/profile_dists/allele_map.json b/tests/test_data/outputs/merge/normal/profile_dists/allele_map.json deleted file mode 100755 index 2d1feaa..0000000 --- a/tests/test_data/outputs/merge/normal/profile_dists/allele_map.json +++ /dev/null @@ -1,78 +0,0 @@ -{ - "locus_1": { - "a47cc24760462371e919143c5cc81376": 1, - "d17b02d12afa7f832ee37df6f24a8f55": 2, - "e9e707ebc64e10a881f1323ebff85369": 3 - }, - "locus_2": { - "8b70e777f6bbf2c91ff75947824b5976": 1, - "e35184c8ff18e9116fc8faef20532f56": 2 - }, - "locus_3": { - "670705cd2a59c4a23a897ac656a888fe": 1 - }, - "locus_4": { - "73790840c76943caac0ebb3b2b3f0b98": 1, - "ac1b21798c0f672ad26f5a91ea278590": 2 - }, - "locus_5": { - "8cf4341689dd00f74adfcc43d1f4a35e": 1, - "d00defcca8588f21ce16fa1d0ac13389": 2 - }, - "locus_6": { - "a11561f2804e2c32c78049f8b9aeb517": 1 - }, - "locus_7": { - "49d9878c9d3071aa1d2f26cb947b784c": 1, - "dc94bf1ec4ff9bed2a1f460cbd958656": 2 - }, - "locus_8": { - "7ebe74afecf146ec4db816c8deced64f": 1 - }, - "locus_9": { - "41ebb36872854b2b33c8c028e23d8ad1": 1 - }, - "locus_10": { - "0": 0, - "796419469778f7ec3851c813f59cfff7": 1, - "fd6284b58a891cf02058906c9ee37a00": 2, - "fe04d17ec353c08b903c85fc0ca4dc02": 3 - }, - "locus_11": { - "0": 0, - "5b128d659955716833ce42f2bb060212": 1, - "c4266f2f24fdd8e039113c6b0955af9f": 2 - }, - "locus_12": { - "60934464690fea26102d1c8c9acb755d": 1, - "eb72da68c159497d5f0c8eeddc51b5ae": 2 - }, - "locus_13": { - "70e88b95c11c37150f37312882af5771": 1, - "8f300259dcb46224bdc1fe5273107324": 2 - }, - "locus_14": { - "2fa0b06ed72e36b4071cab9d0b4f87d0": 1, - "b9060019038526aa6fc38d2f7510edc6": 2 - }, - "locus_15": { - "bc98c2fe196a68a79036814396513a8d": 1 - }, - "locus_16": { - "16e55766c603fe33c9e75d8e81743ae2": 1, - "a9b3cb97dac3cda6e932a49bf9a507bd": 2 - }, - "locus_17": { - "a0d97d985483413f3c18bfe5833ae9ce": 1 - }, - "locus_18": { - "b3021e979faa7600756c06dfadfcf14c": 1 - }, - "locus_19": { - "a012eee23637b48e39b00808a057e35d": 1, - "de32372598811d63bcc1a0eaf6872644": 2 - }, - "locus_20": { - "4461918e985715e4a2b07494e1f91326": 1 - } -} \ No newline at end of file diff --git a/tests/test_data/outputs/merge/normal/profile_dists/query_profile.text b/tests/test_data/outputs/merge/normal/profile_dists/query_profile.text deleted file mode 100755 index 27c9f25..0000000 --- a/tests/test_data/outputs/merge/normal/profile_dists/query_profile.text +++ /dev/null @@ -1,15 +0,0 @@ -sample_id locus_1 locus_2 locus_3 locus_4 locus_5 locus_6 locus_7 locus_8 locus_9 locus_10 locus_11 locus_12 locus_13 locus_14 locus_15 locus_16 locus_17 locus_18 locus_19 locus_20 -G10 2 2 1 2 2 1 2 1 1 3 0 2 2 2 1 1 1 1 1 1 -G12 2 2 1 2 2 1 2 1 1 3 1 1 2 2 1 1 1 1 1 1 -G13 2 1 1 1 1 1 1 1 1 3 2 2 1 1 1 2 1 1 2 1 -G5 3 1 1 1 1 1 1 1 1 3 2 2 2 1 1 2 1 1 2 1 -G7 2 1 1 1 1 1 1 1 1 1 2 2 2 1 1 2 1 1 2 1 -G8 2 2 1 2 2 1 2 1 1 2 1 2 2 2 1 1 1 1 1 1 -G1 2 1 1 1 1 1 1 1 1 3 2 2 2 1 1 2 1 1 2 1 -G2 2 2 1 2 2 1 2 1 1 3 1 2 2 2 1 1 1 1 1 1 -G6 1 2 1 2 2 1 2 1 1 3 1 2 2 2 1 1 1 1 1 1 -G9 2 1 1 1 1 1 1 1 1 0 2 2 2 1 1 2 1 1 2 1 -G14 2 2 1 2 2 1 2 1 1 3 1 2 1 2 1 1 1 1 1 1 -G4 2 2 1 2 2 1 2 1 1 3 1 2 2 2 1 1 1 1 1 1 -G3 2 1 1 1 1 1 1 1 1 3 2 2 2 1 1 2 1 1 2 1 -G11 2 1 1 1 1 1 1 1 1 3 2 1 2 1 1 2 1 1 2 1 diff --git a/tests/test_data/outputs/merge/normal/profile_dists/ref_profile.text b/tests/test_data/outputs/merge/normal/profile_dists/ref_profile.text deleted file mode 100755 index 27c9f25..0000000 --- a/tests/test_data/outputs/merge/normal/profile_dists/ref_profile.text +++ /dev/null @@ -1,15 +0,0 @@ -sample_id locus_1 locus_2 locus_3 locus_4 locus_5 locus_6 locus_7 locus_8 locus_9 locus_10 locus_11 locus_12 locus_13 locus_14 locus_15 locus_16 locus_17 locus_18 locus_19 locus_20 -G10 2 2 1 2 2 1 2 1 1 3 0 2 2 2 1 1 1 1 1 1 -G12 2 2 1 2 2 1 2 1 1 3 1 1 2 2 1 1 1 1 1 1 -G13 2 1 1 1 1 1 1 1 1 3 2 2 1 1 1 2 1 1 2 1 -G5 3 1 1 1 1 1 1 1 1 3 2 2 2 1 1 2 1 1 2 1 -G7 2 1 1 1 1 1 1 1 1 1 2 2 2 1 1 2 1 1 2 1 -G8 2 2 1 2 2 1 2 1 1 2 1 2 2 2 1 1 1 1 1 1 -G1 2 1 1 1 1 1 1 1 1 3 2 2 2 1 1 2 1 1 2 1 -G2 2 2 1 2 2 1 2 1 1 3 1 2 2 2 1 1 1 1 1 1 -G6 1 2 1 2 2 1 2 1 1 3 1 2 2 2 1 1 1 1 1 1 -G9 2 1 1 1 1 1 1 1 1 0 2 2 2 1 1 2 1 1 2 1 -G14 2 2 1 2 2 1 2 1 1 3 1 2 1 2 1 1 1 1 1 1 -G4 2 2 1 2 2 1 2 1 1 3 1 2 2 2 1 1 1 1 1 1 -G3 2 1 1 1 1 1 1 1 1 3 2 2 2 1 1 2 1 1 2 1 -G11 2 1 1 1 1 1 1 1 1 3 2 1 2 1 1 2 1 1 2 1 diff --git a/tests/test_data/outputs/merge/normal/profile_dists/results.text b/tests/test_data/outputs/merge/normal/profile_dists/results.text deleted file mode 100755 index 474eec8..0000000 --- a/tests/test_data/outputs/merge/normal/profile_dists/results.text +++ /dev/null @@ -1,15 +0,0 @@ -dists G10 G12 G13 G5 G7 G8 G1 G2 G6 G9 G14 G4 G3 G11 -G10 0 1 8 8 8 1 7 0 1 7 1 0 7 8 -G12 1 0 10 10 10 2 9 1 2 9 2 1 9 8 -G13 8 10 0 2 2 10 1 9 10 1 8 9 1 2 -G5 8 10 2 0 2 10 1 9 9 1 10 9 1 2 -G7 8 10 2 2 0 9 1 9 10 0 10 9 1 2 -G8 1 2 10 10 9 0 9 1 2 8 2 1 9 10 -G1 7 9 1 1 1 9 0 8 9 0 9 8 0 1 -G2 0 1 9 9 9 1 8 0 1 8 1 0 8 9 -G6 1 2 10 9 10 2 9 1 0 9 2 1 9 10 -G9 7 9 1 1 0 8 0 8 9 0 9 8 0 1 -G14 1 2 8 10 10 2 9 1 2 9 0 1 9 10 -G4 0 1 9 9 9 1 8 0 1 8 1 0 8 9 -G3 7 9 1 1 1 9 0 8 9 0 9 8 0 1 -G11 8 8 2 2 2 10 1 9 10 1 10 9 1 0 diff --git a/tests/test_data/outputs/merge/normal/profile_dists/run.json b/tests/test_data/outputs/merge/normal/profile_dists/run.json deleted file mode 100755 index f64714f..0000000 --- a/tests/test_data/outputs/merge/normal/profile_dists/run.json +++ /dev/null @@ -1,38 +0,0 @@ -{ - "profile_dists": "version: 1.0.0", - "analysis_start_time": "10/06/2024 11:14:29", - "analysis_end_time": "10/06/2024 11:14:37", - "parameters": { - "query": "locidex/merge/normal/profile.tsv", - "ref": "locidex/merge/normal/profile.tsv", - "outdir": "locidex/merge/normal/profile_dists", - "outfmt": "matrix", - "file_type": "text", - "distm": "hamming", - "missing_thresh": 1.0, - "sample_qual_thresh": 1.0, - "match_threshold": -1, - "mapping_file": null, - "batch_size": null, - "max_mem": null, - "force": false, - "skip": false, - "columns": null, - "count_missing": false, - "cpus": 1 - }, - "query_profile_info": { - "num_samples": 14, - "num_samples_pass": 14, - "failed_samples": [], - "parsed_file_path": "locidex/merge/normal/profile_dists/query_profile.text" - }, - "ref_profile_info": { - "num_samples": 14, - "num_samples_pass": 14, - "failed_samples": [], - "parsed_file_path": "locidex/merge/normal/profile_dists/ref_profile.text" - }, - "loci_removed": [], - "result_file": "locidex/merge/normal/profile_dists/results.text" -} \ No newline at end of file diff --git a/tests/test_data/outputs/report/conservative/G1/profile.json b/tests/test_data/outputs/report/conservative/G1/profile.json deleted file mode 100755 index 3f5fdd7..0000000 --- a/tests/test_data/outputs/report/conservative/G1/profile.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "G1": { - "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", - "locus_2": "8b70e777f6bbf2c91ff75947824b5976", - "locus_3": "670705cd2a59c4a23a897ac656a888fe", - "locus_4": "73790840c76943caac0ebb3b2b3f0b98", - "locus_5": "8cf4341689dd00f74adfcc43d1f4a35e", - "locus_6": "a11561f2804e2c32c78049f8b9aeb517", - "locus_7": "49d9878c9d3071aa1d2f26cb947b784c", - "locus_8": "7ebe74afecf146ec4db816c8deced64f", - "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", - "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", - "locus_11": "c4266f2f24fdd8e039113c6b0955af9f", - "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", - "locus_13": "8f300259dcb46224bdc1fe5273107324", - "locus_14": "2fa0b06ed72e36b4071cab9d0b4f87d0", - "locus_15": "bc98c2fe196a68a79036814396513a8d", - "locus_16": "a9b3cb97dac3cda6e932a49bf9a507bd", - "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", - "locus_18": "b3021e979faa7600756c06dfadfcf14c", - "locus_19": "de32372598811d63bcc1a0eaf6872644", - "locus_20": "4461918e985715e4a2b07494e1f91326" - } -} \ No newline at end of file diff --git a/tests/test_data/outputs/report/conservative/G10/profile.json b/tests/test_data/outputs/report/conservative/G10/profile.json deleted file mode 100755 index 3c394e9..0000000 --- a/tests/test_data/outputs/report/conservative/G10/profile.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "G10": { - "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", - "locus_2": "e35184c8ff18e9116fc8faef20532f56", - "locus_3": "670705cd2a59c4a23a897ac656a888fe", - "locus_4": "ac1b21798c0f672ad26f5a91ea278590", - "locus_5": "d00defcca8588f21ce16fa1d0ac13389", - "locus_6": "a11561f2804e2c32c78049f8b9aeb517", - "locus_7": "dc94bf1ec4ff9bed2a1f460cbd958656", - "locus_8": "7ebe74afecf146ec4db816c8deced64f", - "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", - "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", - "locus_11": "-", - "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", - "locus_13": "8f300259dcb46224bdc1fe5273107324", - "locus_14": "b9060019038526aa6fc38d2f7510edc6", - "locus_15": "bc98c2fe196a68a79036814396513a8d", - "locus_16": "16e55766c603fe33c9e75d8e81743ae2", - "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", - "locus_18": "b3021e979faa7600756c06dfadfcf14c", - "locus_19": "a012eee23637b48e39b00808a057e35d", - "locus_20": "4461918e985715e4a2b07494e1f91326" - } -} \ No newline at end of file diff --git a/tests/test_data/outputs/report/conservative/G11/profile.json b/tests/test_data/outputs/report/conservative/G11/profile.json deleted file mode 100755 index d6cf3fd..0000000 --- a/tests/test_data/outputs/report/conservative/G11/profile.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "G11": { - "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", - "locus_2": "8b70e777f6bbf2c91ff75947824b5976", - "locus_3": "670705cd2a59c4a23a897ac656a888fe", - "locus_4": "73790840c76943caac0ebb3b2b3f0b98", - "locus_5": "8cf4341689dd00f74adfcc43d1f4a35e", - "locus_6": "a11561f2804e2c32c78049f8b9aeb517", - "locus_7": "49d9878c9d3071aa1d2f26cb947b784c", - "locus_8": "7ebe74afecf146ec4db816c8deced64f", - "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", - "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", - "locus_11": "c4266f2f24fdd8e039113c6b0955af9f", - "locus_12": "-", - "locus_13": "8f300259dcb46224bdc1fe5273107324", - "locus_14": "2fa0b06ed72e36b4071cab9d0b4f87d0", - "locus_15": "bc98c2fe196a68a79036814396513a8d", - "locus_16": "a9b3cb97dac3cda6e932a49bf9a507bd", - "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", - "locus_18": "b3021e979faa7600756c06dfadfcf14c", - "locus_19": "de32372598811d63bcc1a0eaf6872644", - "locus_20": "4461918e985715e4a2b07494e1f91326" - } -} \ No newline at end of file diff --git a/tests/test_data/outputs/report/conservative/G12/profile.json b/tests/test_data/outputs/report/conservative/G12/profile.json deleted file mode 100755 index c07fbc4..0000000 --- a/tests/test_data/outputs/report/conservative/G12/profile.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "G12": { - "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", - "locus_2": "e35184c8ff18e9116fc8faef20532f56", - "locus_3": "670705cd2a59c4a23a897ac656a888fe", - "locus_4": "ac1b21798c0f672ad26f5a91ea278590", - "locus_5": "d00defcca8588f21ce16fa1d0ac13389", - "locus_6": "a11561f2804e2c32c78049f8b9aeb517", - "locus_7": "dc94bf1ec4ff9bed2a1f460cbd958656", - "locus_8": "7ebe74afecf146ec4db816c8deced64f", - "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", - "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", - "locus_11": "5b128d659955716833ce42f2bb060212", - "locus_12": "-", - "locus_13": "8f300259dcb46224bdc1fe5273107324", - "locus_14": "b9060019038526aa6fc38d2f7510edc6", - "locus_15": "bc98c2fe196a68a79036814396513a8d", - "locus_16": "16e55766c603fe33c9e75d8e81743ae2", - "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", - "locus_18": "b3021e979faa7600756c06dfadfcf14c", - "locus_19": "a012eee23637b48e39b00808a057e35d", - "locus_20": "4461918e985715e4a2b07494e1f91326" - } -} \ No newline at end of file diff --git a/tests/test_data/outputs/report/conservative/G13/profile.json b/tests/test_data/outputs/report/conservative/G13/profile.json deleted file mode 100755 index e7435f9..0000000 --- a/tests/test_data/outputs/report/conservative/G13/profile.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "G13": { - "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", - "locus_2": "8b70e777f6bbf2c91ff75947824b5976", - "locus_3": "670705cd2a59c4a23a897ac656a888fe", - "locus_4": "73790840c76943caac0ebb3b2b3f0b98", - "locus_5": "8cf4341689dd00f74adfcc43d1f4a35e", - "locus_6": "a11561f2804e2c32c78049f8b9aeb517", - "locus_7": "49d9878c9d3071aa1d2f26cb947b784c", - "locus_8": "7ebe74afecf146ec4db816c8deced64f", - "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", - "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", - "locus_11": "c4266f2f24fdd8e039113c6b0955af9f", - "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", - "locus_13": "-", - "locus_14": "2fa0b06ed72e36b4071cab9d0b4f87d0", - "locus_15": "bc98c2fe196a68a79036814396513a8d", - "locus_16": "a9b3cb97dac3cda6e932a49bf9a507bd", - "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", - "locus_18": "b3021e979faa7600756c06dfadfcf14c", - "locus_19": "de32372598811d63bcc1a0eaf6872644", - "locus_20": "4461918e985715e4a2b07494e1f91326" - } -} \ No newline at end of file diff --git a/tests/test_data/outputs/report/conservative/G14/profile.json b/tests/test_data/outputs/report/conservative/G14/profile.json deleted file mode 100755 index 7295749..0000000 --- a/tests/test_data/outputs/report/conservative/G14/profile.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "G14": { - "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", - "locus_2": "e35184c8ff18e9116fc8faef20532f56", - "locus_3": "670705cd2a59c4a23a897ac656a888fe", - "locus_4": "ac1b21798c0f672ad26f5a91ea278590", - "locus_5": "d00defcca8588f21ce16fa1d0ac13389", - "locus_6": "a11561f2804e2c32c78049f8b9aeb517", - "locus_7": "dc94bf1ec4ff9bed2a1f460cbd958656", - "locus_8": "7ebe74afecf146ec4db816c8deced64f", - "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", - "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", - "locus_11": "5b128d659955716833ce42f2bb060212", - "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", - "locus_13": "-", - "locus_14": "b9060019038526aa6fc38d2f7510edc6", - "locus_15": "bc98c2fe196a68a79036814396513a8d", - "locus_16": "16e55766c603fe33c9e75d8e81743ae2", - "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", - "locus_18": "b3021e979faa7600756c06dfadfcf14c", - "locus_19": "a012eee23637b48e39b00808a057e35d", - "locus_20": "4461918e985715e4a2b07494e1f91326" - } -} \ No newline at end of file diff --git a/tests/test_data/outputs/report/conservative/G2/profile.json b/tests/test_data/outputs/report/conservative/G2/profile.json deleted file mode 100755 index 3b8c03f..0000000 --- a/tests/test_data/outputs/report/conservative/G2/profile.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "G2": { - "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", - "locus_2": "e35184c8ff18e9116fc8faef20532f56", - "locus_3": "670705cd2a59c4a23a897ac656a888fe", - "locus_4": "ac1b21798c0f672ad26f5a91ea278590", - "locus_5": "d00defcca8588f21ce16fa1d0ac13389", - "locus_6": "a11561f2804e2c32c78049f8b9aeb517", - "locus_7": "dc94bf1ec4ff9bed2a1f460cbd958656", - "locus_8": "7ebe74afecf146ec4db816c8deced64f", - "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", - "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", - "locus_11": "5b128d659955716833ce42f2bb060212", - "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", - "locus_13": "8f300259dcb46224bdc1fe5273107324", - "locus_14": "b9060019038526aa6fc38d2f7510edc6", - "locus_15": "bc98c2fe196a68a79036814396513a8d", - "locus_16": "16e55766c603fe33c9e75d8e81743ae2", - "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", - "locus_18": "b3021e979faa7600756c06dfadfcf14c", - "locus_19": "a012eee23637b48e39b00808a057e35d", - "locus_20": "4461918e985715e4a2b07494e1f91326" - } -} \ No newline at end of file diff --git a/tests/test_data/outputs/report/conservative/G3/profile.json b/tests/test_data/outputs/report/conservative/G3/profile.json deleted file mode 100755 index fbbe3af..0000000 --- a/tests/test_data/outputs/report/conservative/G3/profile.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "G3": { - "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", - "locus_2": "8b70e777f6bbf2c91ff75947824b5976", - "locus_3": "670705cd2a59c4a23a897ac656a888fe", - "locus_4": "73790840c76943caac0ebb3b2b3f0b98", - "locus_5": "8cf4341689dd00f74adfcc43d1f4a35e", - "locus_6": "a11561f2804e2c32c78049f8b9aeb517", - "locus_7": "49d9878c9d3071aa1d2f26cb947b784c", - "locus_8": "7ebe74afecf146ec4db816c8deced64f", - "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", - "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", - "locus_11": "c4266f2f24fdd8e039113c6b0955af9f", - "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", - "locus_13": "8f300259dcb46224bdc1fe5273107324", - "locus_14": "2fa0b06ed72e36b4071cab9d0b4f87d0", - "locus_15": "bc98c2fe196a68a79036814396513a8d", - "locus_16": "a9b3cb97dac3cda6e932a49bf9a507bd", - "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", - "locus_18": "b3021e979faa7600756c06dfadfcf14c", - "locus_19": "de32372598811d63bcc1a0eaf6872644", - "locus_20": "4461918e985715e4a2b07494e1f91326" - } -} \ No newline at end of file diff --git a/tests/test_data/outputs/report/conservative/G4/profile.json b/tests/test_data/outputs/report/conservative/G4/profile.json deleted file mode 100755 index f5ece9e..0000000 --- a/tests/test_data/outputs/report/conservative/G4/profile.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "G4": { - "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", - "locus_2": "e35184c8ff18e9116fc8faef20532f56", - "locus_3": "670705cd2a59c4a23a897ac656a888fe", - "locus_4": "ac1b21798c0f672ad26f5a91ea278590", - "locus_5": "d00defcca8588f21ce16fa1d0ac13389", - "locus_6": "a11561f2804e2c32c78049f8b9aeb517", - "locus_7": "dc94bf1ec4ff9bed2a1f460cbd958656", - "locus_8": "7ebe74afecf146ec4db816c8deced64f", - "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", - "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", - "locus_11": "5b128d659955716833ce42f2bb060212", - "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", - "locus_13": "8f300259dcb46224bdc1fe5273107324", - "locus_14": "b9060019038526aa6fc38d2f7510edc6", - "locus_15": "bc98c2fe196a68a79036814396513a8d", - "locus_16": "16e55766c603fe33c9e75d8e81743ae2", - "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", - "locus_18": "b3021e979faa7600756c06dfadfcf14c", - "locus_19": "a012eee23637b48e39b00808a057e35d", - "locus_20": "4461918e985715e4a2b07494e1f91326" - } -} \ No newline at end of file diff --git a/tests/test_data/outputs/report/conservative/G5/profile.json b/tests/test_data/outputs/report/conservative/G5/profile.json deleted file mode 100755 index 5afae24..0000000 --- a/tests/test_data/outputs/report/conservative/G5/profile.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "G5": { - "locus_1": "e9e707ebc64e10a881f1323ebff85369", - "locus_2": "8b70e777f6bbf2c91ff75947824b5976", - "locus_3": "670705cd2a59c4a23a897ac656a888fe", - "locus_4": "73790840c76943caac0ebb3b2b3f0b98", - "locus_5": "8cf4341689dd00f74adfcc43d1f4a35e", - "locus_6": "a11561f2804e2c32c78049f8b9aeb517", - "locus_7": "49d9878c9d3071aa1d2f26cb947b784c", - "locus_8": "7ebe74afecf146ec4db816c8deced64f", - "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", - "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", - "locus_11": "c4266f2f24fdd8e039113c6b0955af9f", - "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", - "locus_13": "8f300259dcb46224bdc1fe5273107324", - "locus_14": "2fa0b06ed72e36b4071cab9d0b4f87d0", - "locus_15": "bc98c2fe196a68a79036814396513a8d", - "locus_16": "a9b3cb97dac3cda6e932a49bf9a507bd", - "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", - "locus_18": "b3021e979faa7600756c06dfadfcf14c", - "locus_19": "de32372598811d63bcc1a0eaf6872644", - "locus_20": "4461918e985715e4a2b07494e1f91326" - } -} \ No newline at end of file diff --git a/tests/test_data/outputs/report/conservative/G6/profile.json b/tests/test_data/outputs/report/conservative/G6/profile.json deleted file mode 100755 index 2b54aeb..0000000 --- a/tests/test_data/outputs/report/conservative/G6/profile.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "G6": { - "locus_1": "a47cc24760462371e919143c5cc81376", - "locus_2": "e35184c8ff18e9116fc8faef20532f56", - "locus_3": "670705cd2a59c4a23a897ac656a888fe", - "locus_4": "ac1b21798c0f672ad26f5a91ea278590", - "locus_5": "d00defcca8588f21ce16fa1d0ac13389", - "locus_6": "a11561f2804e2c32c78049f8b9aeb517", - "locus_7": "dc94bf1ec4ff9bed2a1f460cbd958656", - "locus_8": "7ebe74afecf146ec4db816c8deced64f", - "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", - "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", - "locus_11": "5b128d659955716833ce42f2bb060212", - "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", - "locus_13": "8f300259dcb46224bdc1fe5273107324", - "locus_14": "b9060019038526aa6fc38d2f7510edc6", - "locus_15": "bc98c2fe196a68a79036814396513a8d", - "locus_16": "16e55766c603fe33c9e75d8e81743ae2", - "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", - "locus_18": "b3021e979faa7600756c06dfadfcf14c", - "locus_19": "a012eee23637b48e39b00808a057e35d", - "locus_20": "4461918e985715e4a2b07494e1f91326" - } -} \ No newline at end of file diff --git a/tests/test_data/outputs/report/conservative/G7/profile.json b/tests/test_data/outputs/report/conservative/G7/profile.json deleted file mode 100755 index 601f66d..0000000 --- a/tests/test_data/outputs/report/conservative/G7/profile.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "G7": { - "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", - "locus_2": "8b70e777f6bbf2c91ff75947824b5976", - "locus_3": "670705cd2a59c4a23a897ac656a888fe", - "locus_4": "73790840c76943caac0ebb3b2b3f0b98", - "locus_5": "8cf4341689dd00f74adfcc43d1f4a35e", - "locus_6": "a11561f2804e2c32c78049f8b9aeb517", - "locus_7": "49d9878c9d3071aa1d2f26cb947b784c", - "locus_8": "7ebe74afecf146ec4db816c8deced64f", - "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", - "locus_10": "796419469778f7ec3851c813f59cfff7", - "locus_11": "c4266f2f24fdd8e039113c6b0955af9f", - "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", - "locus_13": "8f300259dcb46224bdc1fe5273107324", - "locus_14": "2fa0b06ed72e36b4071cab9d0b4f87d0", - "locus_15": "bc98c2fe196a68a79036814396513a8d", - "locus_16": "a9b3cb97dac3cda6e932a49bf9a507bd", - "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", - "locus_18": "b3021e979faa7600756c06dfadfcf14c", - "locus_19": "de32372598811d63bcc1a0eaf6872644", - "locus_20": "4461918e985715e4a2b07494e1f91326" - } -} \ No newline at end of file diff --git a/tests/test_data/outputs/report/conservative/G8/profile.json b/tests/test_data/outputs/report/conservative/G8/profile.json deleted file mode 100755 index 2386944..0000000 --- a/tests/test_data/outputs/report/conservative/G8/profile.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "G8": { - "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", - "locus_2": "e35184c8ff18e9116fc8faef20532f56", - "locus_3": "670705cd2a59c4a23a897ac656a888fe", - "locus_4": "ac1b21798c0f672ad26f5a91ea278590", - "locus_5": "d00defcca8588f21ce16fa1d0ac13389", - "locus_6": "a11561f2804e2c32c78049f8b9aeb517", - "locus_7": "dc94bf1ec4ff9bed2a1f460cbd958656", - "locus_8": "7ebe74afecf146ec4db816c8deced64f", - "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", - "locus_10": "fd6284b58a891cf02058906c9ee37a00", - "locus_11": "5b128d659955716833ce42f2bb060212", - "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", - "locus_13": "8f300259dcb46224bdc1fe5273107324", - "locus_14": "b9060019038526aa6fc38d2f7510edc6", - "locus_15": "bc98c2fe196a68a79036814396513a8d", - "locus_16": "16e55766c603fe33c9e75d8e81743ae2", - "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", - "locus_18": "b3021e979faa7600756c06dfadfcf14c", - "locus_19": "a012eee23637b48e39b00808a057e35d", - "locus_20": "4461918e985715e4a2b07494e1f91326" - } -} \ No newline at end of file diff --git a/tests/test_data/outputs/report/conservative/G9/profile.json b/tests/test_data/outputs/report/conservative/G9/profile.json deleted file mode 100755 index defcdbe..0000000 --- a/tests/test_data/outputs/report/conservative/G9/profile.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "G9": { - "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", - "locus_2": "8b70e777f6bbf2c91ff75947824b5976", - "locus_3": "670705cd2a59c4a23a897ac656a888fe", - "locus_4": "73790840c76943caac0ebb3b2b3f0b98", - "locus_5": "8cf4341689dd00f74adfcc43d1f4a35e", - "locus_6": "a11561f2804e2c32c78049f8b9aeb517", - "locus_7": "49d9878c9d3071aa1d2f26cb947b784c", - "locus_8": "7ebe74afecf146ec4db816c8deced64f", - "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", - "locus_10": "-", - "locus_11": "c4266f2f24fdd8e039113c6b0955af9f", - "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", - "locus_13": "8f300259dcb46224bdc1fe5273107324", - "locus_14": "2fa0b06ed72e36b4071cab9d0b4f87d0", - "locus_15": "bc98c2fe196a68a79036814396513a8d", - "locus_16": "a9b3cb97dac3cda6e932a49bf9a507bd", - "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", - "locus_18": "b3021e979faa7600756c06dfadfcf14c", - "locus_19": "de32372598811d63bcc1a0eaf6872644", - "locus_20": "4461918e985715e4a2b07494e1f91326" - } -} \ No newline at end of file diff --git a/tests/test_data/outputs/report/normal/G1/profile.json b/tests/test_data/outputs/report/normal/G1/profile.json deleted file mode 100755 index 3f5fdd7..0000000 --- a/tests/test_data/outputs/report/normal/G1/profile.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "G1": { - "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", - "locus_2": "8b70e777f6bbf2c91ff75947824b5976", - "locus_3": "670705cd2a59c4a23a897ac656a888fe", - "locus_4": "73790840c76943caac0ebb3b2b3f0b98", - "locus_5": "8cf4341689dd00f74adfcc43d1f4a35e", - "locus_6": "a11561f2804e2c32c78049f8b9aeb517", - "locus_7": "49d9878c9d3071aa1d2f26cb947b784c", - "locus_8": "7ebe74afecf146ec4db816c8deced64f", - "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", - "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", - "locus_11": "c4266f2f24fdd8e039113c6b0955af9f", - "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", - "locus_13": "8f300259dcb46224bdc1fe5273107324", - "locus_14": "2fa0b06ed72e36b4071cab9d0b4f87d0", - "locus_15": "bc98c2fe196a68a79036814396513a8d", - "locus_16": "a9b3cb97dac3cda6e932a49bf9a507bd", - "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", - "locus_18": "b3021e979faa7600756c06dfadfcf14c", - "locus_19": "de32372598811d63bcc1a0eaf6872644", - "locus_20": "4461918e985715e4a2b07494e1f91326" - } -} \ No newline at end of file diff --git a/tests/test_data/outputs/report/normal/G10/profile.json b/tests/test_data/outputs/report/normal/G10/profile.json deleted file mode 100755 index 3c394e9..0000000 --- a/tests/test_data/outputs/report/normal/G10/profile.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "G10": { - "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", - "locus_2": "e35184c8ff18e9116fc8faef20532f56", - "locus_3": "670705cd2a59c4a23a897ac656a888fe", - "locus_4": "ac1b21798c0f672ad26f5a91ea278590", - "locus_5": "d00defcca8588f21ce16fa1d0ac13389", - "locus_6": "a11561f2804e2c32c78049f8b9aeb517", - "locus_7": "dc94bf1ec4ff9bed2a1f460cbd958656", - "locus_8": "7ebe74afecf146ec4db816c8deced64f", - "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", - "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", - "locus_11": "-", - "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", - "locus_13": "8f300259dcb46224bdc1fe5273107324", - "locus_14": "b9060019038526aa6fc38d2f7510edc6", - "locus_15": "bc98c2fe196a68a79036814396513a8d", - "locus_16": "16e55766c603fe33c9e75d8e81743ae2", - "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", - "locus_18": "b3021e979faa7600756c06dfadfcf14c", - "locus_19": "a012eee23637b48e39b00808a057e35d", - "locus_20": "4461918e985715e4a2b07494e1f91326" - } -} \ No newline at end of file diff --git a/tests/test_data/outputs/report/normal/G11/profile.json b/tests/test_data/outputs/report/normal/G11/profile.json deleted file mode 100755 index 65802a8..0000000 --- a/tests/test_data/outputs/report/normal/G11/profile.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "G11": { - "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", - "locus_2": "8b70e777f6bbf2c91ff75947824b5976", - "locus_3": "670705cd2a59c4a23a897ac656a888fe", - "locus_4": "73790840c76943caac0ebb3b2b3f0b98", - "locus_5": "8cf4341689dd00f74adfcc43d1f4a35e", - "locus_6": "a11561f2804e2c32c78049f8b9aeb517", - "locus_7": "49d9878c9d3071aa1d2f26cb947b784c", - "locus_8": "7ebe74afecf146ec4db816c8deced64f", - "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", - "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", - "locus_11": "c4266f2f24fdd8e039113c6b0955af9f", - "locus_12": "60934464690fea26102d1c8c9acb755d", - "locus_13": "8f300259dcb46224bdc1fe5273107324", - "locus_14": "2fa0b06ed72e36b4071cab9d0b4f87d0", - "locus_15": "bc98c2fe196a68a79036814396513a8d", - "locus_16": "a9b3cb97dac3cda6e932a49bf9a507bd", - "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", - "locus_18": "b3021e979faa7600756c06dfadfcf14c", - "locus_19": "de32372598811d63bcc1a0eaf6872644", - "locus_20": "4461918e985715e4a2b07494e1f91326" - } -} \ No newline at end of file diff --git a/tests/test_data/outputs/report/normal/G12/profile.json b/tests/test_data/outputs/report/normal/G12/profile.json deleted file mode 100755 index 5b138f1..0000000 --- a/tests/test_data/outputs/report/normal/G12/profile.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "G12": { - "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", - "locus_2": "e35184c8ff18e9116fc8faef20532f56", - "locus_3": "670705cd2a59c4a23a897ac656a888fe", - "locus_4": "ac1b21798c0f672ad26f5a91ea278590", - "locus_5": "d00defcca8588f21ce16fa1d0ac13389", - "locus_6": "a11561f2804e2c32c78049f8b9aeb517", - "locus_7": "dc94bf1ec4ff9bed2a1f460cbd958656", - "locus_8": "7ebe74afecf146ec4db816c8deced64f", - "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", - "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", - "locus_11": "5b128d659955716833ce42f2bb060212", - "locus_12": "60934464690fea26102d1c8c9acb755d", - "locus_13": "8f300259dcb46224bdc1fe5273107324", - "locus_14": "b9060019038526aa6fc38d2f7510edc6", - "locus_15": "bc98c2fe196a68a79036814396513a8d", - "locus_16": "16e55766c603fe33c9e75d8e81743ae2", - "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", - "locus_18": "b3021e979faa7600756c06dfadfcf14c", - "locus_19": "a012eee23637b48e39b00808a057e35d", - "locus_20": "4461918e985715e4a2b07494e1f91326" - } -} \ No newline at end of file diff --git a/tests/test_data/outputs/report/normal/G13/profile.json b/tests/test_data/outputs/report/normal/G13/profile.json deleted file mode 100755 index 29e62b3..0000000 --- a/tests/test_data/outputs/report/normal/G13/profile.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "G13": { - "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", - "locus_2": "8b70e777f6bbf2c91ff75947824b5976", - "locus_3": "670705cd2a59c4a23a897ac656a888fe", - "locus_4": "73790840c76943caac0ebb3b2b3f0b98", - "locus_5": "8cf4341689dd00f74adfcc43d1f4a35e", - "locus_6": "a11561f2804e2c32c78049f8b9aeb517", - "locus_7": "49d9878c9d3071aa1d2f26cb947b784c", - "locus_8": "7ebe74afecf146ec4db816c8deced64f", - "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", - "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", - "locus_11": "c4266f2f24fdd8e039113c6b0955af9f", - "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", - "locus_13": "70e88b95c11c37150f37312882af5771", - "locus_14": "2fa0b06ed72e36b4071cab9d0b4f87d0", - "locus_15": "bc98c2fe196a68a79036814396513a8d", - "locus_16": "a9b3cb97dac3cda6e932a49bf9a507bd", - "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", - "locus_18": "b3021e979faa7600756c06dfadfcf14c", - "locus_19": "de32372598811d63bcc1a0eaf6872644", - "locus_20": "4461918e985715e4a2b07494e1f91326" - } -} \ No newline at end of file diff --git a/tests/test_data/outputs/report/normal/G14/profile.json b/tests/test_data/outputs/report/normal/G14/profile.json deleted file mode 100755 index 8752d17..0000000 --- a/tests/test_data/outputs/report/normal/G14/profile.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "G14": { - "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", - "locus_2": "e35184c8ff18e9116fc8faef20532f56", - "locus_3": "670705cd2a59c4a23a897ac656a888fe", - "locus_4": "ac1b21798c0f672ad26f5a91ea278590", - "locus_5": "d00defcca8588f21ce16fa1d0ac13389", - "locus_6": "a11561f2804e2c32c78049f8b9aeb517", - "locus_7": "dc94bf1ec4ff9bed2a1f460cbd958656", - "locus_8": "7ebe74afecf146ec4db816c8deced64f", - "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", - "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", - "locus_11": "5b128d659955716833ce42f2bb060212", - "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", - "locus_13": "70e88b95c11c37150f37312882af5771", - "locus_14": "b9060019038526aa6fc38d2f7510edc6", - "locus_15": "bc98c2fe196a68a79036814396513a8d", - "locus_16": "16e55766c603fe33c9e75d8e81743ae2", - "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", - "locus_18": "b3021e979faa7600756c06dfadfcf14c", - "locus_19": "a012eee23637b48e39b00808a057e35d", - "locus_20": "4461918e985715e4a2b07494e1f91326" - } -} \ No newline at end of file diff --git a/tests/test_data/outputs/report/normal/G2/profile.json b/tests/test_data/outputs/report/normal/G2/profile.json deleted file mode 100755 index 3b8c03f..0000000 --- a/tests/test_data/outputs/report/normal/G2/profile.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "G2": { - "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", - "locus_2": "e35184c8ff18e9116fc8faef20532f56", - "locus_3": "670705cd2a59c4a23a897ac656a888fe", - "locus_4": "ac1b21798c0f672ad26f5a91ea278590", - "locus_5": "d00defcca8588f21ce16fa1d0ac13389", - "locus_6": "a11561f2804e2c32c78049f8b9aeb517", - "locus_7": "dc94bf1ec4ff9bed2a1f460cbd958656", - "locus_8": "7ebe74afecf146ec4db816c8deced64f", - "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", - "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", - "locus_11": "5b128d659955716833ce42f2bb060212", - "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", - "locus_13": "8f300259dcb46224bdc1fe5273107324", - "locus_14": "b9060019038526aa6fc38d2f7510edc6", - "locus_15": "bc98c2fe196a68a79036814396513a8d", - "locus_16": "16e55766c603fe33c9e75d8e81743ae2", - "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", - "locus_18": "b3021e979faa7600756c06dfadfcf14c", - "locus_19": "a012eee23637b48e39b00808a057e35d", - "locus_20": "4461918e985715e4a2b07494e1f91326" - } -} \ No newline at end of file diff --git a/tests/test_data/outputs/report/normal/G3/profile.json b/tests/test_data/outputs/report/normal/G3/profile.json deleted file mode 100755 index fbbe3af..0000000 --- a/tests/test_data/outputs/report/normal/G3/profile.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "G3": { - "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", - "locus_2": "8b70e777f6bbf2c91ff75947824b5976", - "locus_3": "670705cd2a59c4a23a897ac656a888fe", - "locus_4": "73790840c76943caac0ebb3b2b3f0b98", - "locus_5": "8cf4341689dd00f74adfcc43d1f4a35e", - "locus_6": "a11561f2804e2c32c78049f8b9aeb517", - "locus_7": "49d9878c9d3071aa1d2f26cb947b784c", - "locus_8": "7ebe74afecf146ec4db816c8deced64f", - "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", - "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", - "locus_11": "c4266f2f24fdd8e039113c6b0955af9f", - "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", - "locus_13": "8f300259dcb46224bdc1fe5273107324", - "locus_14": "2fa0b06ed72e36b4071cab9d0b4f87d0", - "locus_15": "bc98c2fe196a68a79036814396513a8d", - "locus_16": "a9b3cb97dac3cda6e932a49bf9a507bd", - "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", - "locus_18": "b3021e979faa7600756c06dfadfcf14c", - "locus_19": "de32372598811d63bcc1a0eaf6872644", - "locus_20": "4461918e985715e4a2b07494e1f91326" - } -} \ No newline at end of file diff --git a/tests/test_data/outputs/report/normal/G4/profile.json b/tests/test_data/outputs/report/normal/G4/profile.json deleted file mode 100755 index f5ece9e..0000000 --- a/tests/test_data/outputs/report/normal/G4/profile.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "G4": { - "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", - "locus_2": "e35184c8ff18e9116fc8faef20532f56", - "locus_3": "670705cd2a59c4a23a897ac656a888fe", - "locus_4": "ac1b21798c0f672ad26f5a91ea278590", - "locus_5": "d00defcca8588f21ce16fa1d0ac13389", - "locus_6": "a11561f2804e2c32c78049f8b9aeb517", - "locus_7": "dc94bf1ec4ff9bed2a1f460cbd958656", - "locus_8": "7ebe74afecf146ec4db816c8deced64f", - "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", - "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", - "locus_11": "5b128d659955716833ce42f2bb060212", - "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", - "locus_13": "8f300259dcb46224bdc1fe5273107324", - "locus_14": "b9060019038526aa6fc38d2f7510edc6", - "locus_15": "bc98c2fe196a68a79036814396513a8d", - "locus_16": "16e55766c603fe33c9e75d8e81743ae2", - "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", - "locus_18": "b3021e979faa7600756c06dfadfcf14c", - "locus_19": "a012eee23637b48e39b00808a057e35d", - "locus_20": "4461918e985715e4a2b07494e1f91326" - } -} \ No newline at end of file diff --git a/tests/test_data/outputs/report/normal/G5/profile.json b/tests/test_data/outputs/report/normal/G5/profile.json deleted file mode 100755 index 5afae24..0000000 --- a/tests/test_data/outputs/report/normal/G5/profile.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "G5": { - "locus_1": "e9e707ebc64e10a881f1323ebff85369", - "locus_2": "8b70e777f6bbf2c91ff75947824b5976", - "locus_3": "670705cd2a59c4a23a897ac656a888fe", - "locus_4": "73790840c76943caac0ebb3b2b3f0b98", - "locus_5": "8cf4341689dd00f74adfcc43d1f4a35e", - "locus_6": "a11561f2804e2c32c78049f8b9aeb517", - "locus_7": "49d9878c9d3071aa1d2f26cb947b784c", - "locus_8": "7ebe74afecf146ec4db816c8deced64f", - "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", - "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", - "locus_11": "c4266f2f24fdd8e039113c6b0955af9f", - "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", - "locus_13": "8f300259dcb46224bdc1fe5273107324", - "locus_14": "2fa0b06ed72e36b4071cab9d0b4f87d0", - "locus_15": "bc98c2fe196a68a79036814396513a8d", - "locus_16": "a9b3cb97dac3cda6e932a49bf9a507bd", - "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", - "locus_18": "b3021e979faa7600756c06dfadfcf14c", - "locus_19": "de32372598811d63bcc1a0eaf6872644", - "locus_20": "4461918e985715e4a2b07494e1f91326" - } -} \ No newline at end of file diff --git a/tests/test_data/outputs/report/normal/G6/profile.json b/tests/test_data/outputs/report/normal/G6/profile.json deleted file mode 100755 index 2b54aeb..0000000 --- a/tests/test_data/outputs/report/normal/G6/profile.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "G6": { - "locus_1": "a47cc24760462371e919143c5cc81376", - "locus_2": "e35184c8ff18e9116fc8faef20532f56", - "locus_3": "670705cd2a59c4a23a897ac656a888fe", - "locus_4": "ac1b21798c0f672ad26f5a91ea278590", - "locus_5": "d00defcca8588f21ce16fa1d0ac13389", - "locus_6": "a11561f2804e2c32c78049f8b9aeb517", - "locus_7": "dc94bf1ec4ff9bed2a1f460cbd958656", - "locus_8": "7ebe74afecf146ec4db816c8deced64f", - "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", - "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", - "locus_11": "5b128d659955716833ce42f2bb060212", - "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", - "locus_13": "8f300259dcb46224bdc1fe5273107324", - "locus_14": "b9060019038526aa6fc38d2f7510edc6", - "locus_15": "bc98c2fe196a68a79036814396513a8d", - "locus_16": "16e55766c603fe33c9e75d8e81743ae2", - "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", - "locus_18": "b3021e979faa7600756c06dfadfcf14c", - "locus_19": "a012eee23637b48e39b00808a057e35d", - "locus_20": "4461918e985715e4a2b07494e1f91326" - } -} \ No newline at end of file diff --git a/tests/test_data/outputs/report/normal/G7/profile.json b/tests/test_data/outputs/report/normal/G7/profile.json deleted file mode 100755 index 601f66d..0000000 --- a/tests/test_data/outputs/report/normal/G7/profile.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "G7": { - "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", - "locus_2": "8b70e777f6bbf2c91ff75947824b5976", - "locus_3": "670705cd2a59c4a23a897ac656a888fe", - "locus_4": "73790840c76943caac0ebb3b2b3f0b98", - "locus_5": "8cf4341689dd00f74adfcc43d1f4a35e", - "locus_6": "a11561f2804e2c32c78049f8b9aeb517", - "locus_7": "49d9878c9d3071aa1d2f26cb947b784c", - "locus_8": "7ebe74afecf146ec4db816c8deced64f", - "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", - "locus_10": "796419469778f7ec3851c813f59cfff7", - "locus_11": "c4266f2f24fdd8e039113c6b0955af9f", - "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", - "locus_13": "8f300259dcb46224bdc1fe5273107324", - "locus_14": "2fa0b06ed72e36b4071cab9d0b4f87d0", - "locus_15": "bc98c2fe196a68a79036814396513a8d", - "locus_16": "a9b3cb97dac3cda6e932a49bf9a507bd", - "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", - "locus_18": "b3021e979faa7600756c06dfadfcf14c", - "locus_19": "de32372598811d63bcc1a0eaf6872644", - "locus_20": "4461918e985715e4a2b07494e1f91326" - } -} \ No newline at end of file diff --git a/tests/test_data/outputs/report/normal/G8/profile.json b/tests/test_data/outputs/report/normal/G8/profile.json deleted file mode 100755 index 2386944..0000000 --- a/tests/test_data/outputs/report/normal/G8/profile.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "G8": { - "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", - "locus_2": "e35184c8ff18e9116fc8faef20532f56", - "locus_3": "670705cd2a59c4a23a897ac656a888fe", - "locus_4": "ac1b21798c0f672ad26f5a91ea278590", - "locus_5": "d00defcca8588f21ce16fa1d0ac13389", - "locus_6": "a11561f2804e2c32c78049f8b9aeb517", - "locus_7": "dc94bf1ec4ff9bed2a1f460cbd958656", - "locus_8": "7ebe74afecf146ec4db816c8deced64f", - "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", - "locus_10": "fd6284b58a891cf02058906c9ee37a00", - "locus_11": "5b128d659955716833ce42f2bb060212", - "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", - "locus_13": "8f300259dcb46224bdc1fe5273107324", - "locus_14": "b9060019038526aa6fc38d2f7510edc6", - "locus_15": "bc98c2fe196a68a79036814396513a8d", - "locus_16": "16e55766c603fe33c9e75d8e81743ae2", - "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", - "locus_18": "b3021e979faa7600756c06dfadfcf14c", - "locus_19": "a012eee23637b48e39b00808a057e35d", - "locus_20": "4461918e985715e4a2b07494e1f91326" - } -} \ No newline at end of file diff --git a/tests/test_data/outputs/report/normal/G9/profile.json b/tests/test_data/outputs/report/normal/G9/profile.json deleted file mode 100755 index defcdbe..0000000 --- a/tests/test_data/outputs/report/normal/G9/profile.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "G9": { - "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", - "locus_2": "8b70e777f6bbf2c91ff75947824b5976", - "locus_3": "670705cd2a59c4a23a897ac656a888fe", - "locus_4": "73790840c76943caac0ebb3b2b3f0b98", - "locus_5": "8cf4341689dd00f74adfcc43d1f4a35e", - "locus_6": "a11561f2804e2c32c78049f8b9aeb517", - "locus_7": "49d9878c9d3071aa1d2f26cb947b784c", - "locus_8": "7ebe74afecf146ec4db816c8deced64f", - "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", - "locus_10": "-", - "locus_11": "c4266f2f24fdd8e039113c6b0955af9f", - "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", - "locus_13": "8f300259dcb46224bdc1fe5273107324", - "locus_14": "2fa0b06ed72e36b4071cab9d0b4f87d0", - "locus_15": "bc98c2fe196a68a79036814396513a8d", - "locus_16": "a9b3cb97dac3cda6e932a49bf9a507bd", - "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", - "locus_18": "b3021e979faa7600756c06dfadfcf14c", - "locus_19": "de32372598811d63bcc1a0eaf6872644", - "locus_20": "4461918e985715e4a2b07494e1f91326" - } -} \ No newline at end of file diff --git a/tests/test_data/outputs/search/G1/blast/nucleotide/hsps.txt b/tests/test_data/outputs/search/G1/blast/nucleotide/hsps.txt deleted file mode 100755 index cdab2ba..0000000 --- a/tests/test_data/outputs/search/G1/blast/nucleotide/hsps.txt +++ /dev/null @@ -1,20 +0,0 @@ -0 0 102 102 1 102 1 102 102 0 100.000 100 100 plus 1.80e-51 189 -1 9 762 762 1 762 1 762 762 0 100.000 100 100 plus 0.0 1408 -2 10 858 858 1 858 1 858 858 19 97.786 100 100 plus 0.0 1480 -3 11 972 972 1 972 1 972 972 0 100.000 100 100 plus 0.0 1796 -4 12 1098 1098 1 1098 1 1098 1098 0 100.000 100 100 plus 0.0 2028 -5 13 1281 1281 1 1281 1 1281 1281 11 99.141 100 100 plus 0.0 2305 -6 14 1434 1434 1 1434 1 1434 1434 0 100.000 100 100 plus 0.0 2649 -7 15 1464 1464 1 1464 1 1464 1464 15 98.975 100 100 plus 0.0 2621 -8 16 1836 1836 1 1836 1 1836 1836 0 100.000 100 100 plus 0.0 3391 -9 17 1914 1914 1 1914 1 1914 1914 0 100.000 100 100 plus 0.0 3535 -10 18 2037 2037 1 2037 1 2037 2037 16 99.215 100 100 plus 0.0 3674 -11 1 285 285 1 285 1 285 285 17 94.035 100 100 plus 2.31e-124 433 -12 19 4935 4935 1 4935 1 4935 4935 0 100.000 100 100 plus 0.0 9114 -13 2 327 327 1 327 1 327 327 0 100.000 100 100 plus 5.34e-176 604 -14 3 417 417 1 417 1 417 417 11 97.362 100 100 plus 0.0 710 -15 4 444 444 1 444 1 444 444 15 96.622 100 100 plus 0.0 737 -16 5 543 543 1 543 1 543 543 0 100.000 100 100 plus 0.0 1003 -17 6 606 606 1 606 1 606 606 15 97.525 100 100 plus 0.0 1037 -18 7 642 642 1 642 1 642 642 0 100.000 100 100 plus 0.0 1186 -19 8 684 684 1 684 1 684 684 0 100.000 100 100 plus 0.0 1264 diff --git a/tests/test_data/outputs/search/G1/blast/nucleotide/queries.fasta b/tests/test_data/outputs/search/G1/blast/nucleotide/queries.fasta deleted file mode 100755 index ce3c3a9..0000000 --- a/tests/test_data/outputs/search/G1/blast/nucleotide/queries.fasta +++ /dev/null @@ -1,40 +0,0 @@ ->0 -atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa ->1 -atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa ->2 -gtgaacctggtgaaaaccacgggtctgcacgcggcgggcaaaggcattaacgttgccaaagtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccgacttcaacttttccggctttgacgtgactcccgcagactgggaacgctttgttaacgactccctgagctggctgggtcagttcgatatggtctgcgtcagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatctttgatagtagccgtgaagcgttagtcgccggtcttaaagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcacgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgtgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggtaagtaccgtcggcgcaggcgattccatggttggcggattgatttacggcctgctgatgcgcgagtccagcgaacatacgctgcgtctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga ->3 -atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga ->4 -atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga ->5 -ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctcttccgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccaacgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcagaaaggcgggattggtttgcgcggtccgggtgaaacccagctcgaaaccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgaaaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgctgctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaaatccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtctgcaggtacgaatgccgattgtcgactggcgtcgcctctgtaaacaagaaccagcgttgatcgaatacgtgatctag ->6 -gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga ->7 -atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctggtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatcagcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacgctgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcattcaaccggaatggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaacgctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgaggcgctggcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtgagctgaccagccttgtgcgcaaccttaatcaactgctcaaaagcgagcgcgaacgttataacaaataccgcacgaccctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatcagcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcataccccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttttggccgacagcatcccacacagaaagaggaataa ->8 -atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa ->9 -atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag ->10 -atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacattgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccaccgtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggccgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattactggaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcattcagggcggtacggaagtttgtcagcaaaaactggataccatgcgcaacgcaggcgtaaaagtgaacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggcgtcagcgcagagatcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacaggagaccggcaagctcggcgagatcctgttctttatgcgtgcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccggcaataactggcagttcgacggcgacgcggaaactattgcccactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgaaattaccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa ->11 -atgaaactttttctgacaacggcagcgctaactgcgaccttgatttccggcatggcatttgccagcgatcccgtcatcccctgggcgactaacagcggcggtacggaaagcacgcatattgcggcaatgggcgaggatttgaatgcgcagcatcagcagatcacccacacgcatgaaggcgtatgggcggccaattccggcagcattcaggctgatgaagccgcgctgaccagcaacaagccgccggtacaaggccacccggaattgatgccgcatcagggctaa ->12 -atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa ->13 -atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa ->14 -ctgaaactaatacctttttatttgctcgcgctattttctgccgcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaaccactgcagataacccgccttataccatgacaatattgccgccagtagaaggctgcggggaaaaatgtgtgaaactgaacgtacagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaaggacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctataaccgacagctgcaattactttgggctggcgtaataccgaaaaataaaaaggggaaatggataacgaaaatggaaaattactggttcaggcaggtaaaagcgcactaa ->15 -atgaaaaaattttatagctgtcttcctgtctttttactgatcggctgtgcccaggtgcccctcccttcctccgtgagcaaaccggtacagcaacctggcgctcagaaagagcaactggccaacgcaaatagtattgatgagtgtcagtctcttccgtatgtgccgtcagaccttgcgaagaataaatcattatcaaaccagaacgctgataattccgcatcaaaaaatagcgcaatcagctcaagcattttttgcgaaaaatataaacaaaccaaagaacaggcgcttaccttcttccaggaacatccacaatacatgcgttcgaaagaagatgaagagcaactcatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa ->16 -atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa ->17 -gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggcctcacaacatgcctggagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgtcagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcaccatccagtcggcaacctggttttccgcaggcaggtcgcttacggcggataaaaaaatgatggattgcggcagcggtatttatgcttcaatcaatacgttactgaaaaagtcgcagaataaaaatatagtcatttttacccataatcattgtttgacgtatattgcaaaaaataagcgcggcgtgaagtttgatccggactatcttaatgcgctcgttatgtatgcggaaaacggaaaactctttttagacggtgaatttgttcccggctag ->18 -atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga ->19 -atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/search/G1/blast/protein/hsps.txt b/tests/test_data/outputs/search/G1/blast/protein/hsps.txt deleted file mode 100755 index 233979d..0000000 --- a/tests/test_data/outputs/search/G1/blast/protein/hsps.txt +++ /dev/null @@ -1,20 +0,0 @@ -0 0 34 34 1 34 1 34 34 0 100.000 100 100 N/A 4.94e-20 64.3 -1 9 254 254 1 254 1 254 254 0 100.000 100 100 N/A 0.0 514 -2 10 286 286 1 286 1 286 286 17 94.056 100 100 N/A 0.0 518 -3 11 324 324 1 324 1 324 324 0 100.000 100 100 N/A 0.0 640 -4 12 366 366 1 366 1 366 366 0 100.000 100 100 N/A 0.0 754 -5 13 427 427 1 427 1 427 427 9 97.892 100 100 N/A 0.0 846 -6 14 478 478 1 478 1 478 478 0 100.000 100 100 N/A 0.0 972 -7 15 488 488 1 488 1 488 488 14 97.131 100 100 N/A 0.0 972 -8 16 612 612 1 612 1 612 612 0 100.000 100 100 N/A 0.0 1241 -9 17 638 638 1 638 1 638 638 0 100.000 100 100 N/A 0.0 1326 -10 18 679 679 1 679 1 679 679 8 98.822 100 100 N/A 0.0 1403 -11 1 95 95 1 95 1 95 95 11 88.421 100 100 N/A 2.89e-51 147 -12 19 1645 1645 1 1645 1 1645 1645 0 100.000 100 100 N/A 0.0 3332 -13 2 109 109 1 109 1 109 109 0 100.000 100 100 N/A 3.09e-81 224 -14 3 139 139 1 139 1 139 139 9 93.525 100 100 N/A 3.70e-98 269 -15 4 148 148 1 148 1 148 148 11 92.568 100 100 N/A 1.56e-97 268 -16 5 181 181 1 181 1 181 181 0 100.000 100 100 N/A 1.79e-140 380 -17 6 202 202 1 202 1 202 202 14 93.069 100 100 N/A 2.36e-141 384 -18 7 214 214 1 214 1 214 214 0 100.000 100 100 N/A 1.75e-157 426 -19 8 228 228 1 228 1 228 228 0 100.000 100 100 N/A 6.32e-172 463 diff --git a/tests/test_data/outputs/search/G1/blast/protein/queries.fasta b/tests/test_data/outputs/search/G1/blast/protein/queries.fasta deleted file mode 100755 index 3ac162b..0000000 --- a/tests/test_data/outputs/search/G1/blast/protein/queries.fasta +++ /dev/null @@ -1,40 +0,0 @@ ->0 -MYDPPFLEALMITASFFAIFIIIVVSVLLLEGD* ->1 -MRILLSNDDGVHAPGIQTLAKALREFADVQVVAPDRNRSGASNSLTLESSLRTFTFDNGDIAVQMGTPTDCVYLGVNALMRPRPDIVVSGINAGPNLGDDVIYSGTVAAAMEGRHLGFPALAVSLNGYQHYDTAAAVTCALLRGLSREPLRTGRILNVNVPDLPLAQVKGIRVTRCGSRHPADKVIPQEDPRGNTLYWIGPPGDKYDAGPDTDFAAVDEGYVSVTPLHVDLTAHSAHDVVSDWLDSVGVGTQW* ->2 -VNLVKTTGLHAAGKGINVAKVLKDLGIDVTVGGFLGKDNQDGFQQLFSELGIANRFQVVQGRTRINVKLTEKDGEVTDFNFSGFDVTPADWERFVNDSLSWLGQFDMVCVSGSLPAGVSPEAFTDWMTRLRSQCPCIIFDSSREALVAGLKAAPWLVKPNRRELEIWAGRKLPEMKDVIDAAHALREQGIAHVVISLGAEGALWVNASGEWIAKPPAVDVVSTVGAGDSMVGGLIYGLLMRESSEHTLRLATAVAALAVSQSNVGITDRPQLAAMMARVDLQPFN* ->3 -MNKNKYSTPLLMLATILAGMLSPMQSAVNGQLGHWLQDGNACAVISFASGLVVMFFIIIARKETRQQFASIPTLIKKRKIPLWNWFAGLCGAMVVFSEGASASALGVATFQTALISALLLSGLLCDRFGIGVEEKKYFTPWRITGALFAVIATIFVVSPQWHSTSFILLAILPFLAGLLAGWQPAGNAKVAEATGSMLVSITWNFIVGFCVLGAALAIRIALGHVTIQLPDTWWMYLGGPLGLLSIGLMAILVRGLGLLMLGVASTAGQLLGSVLIDELIPSLGNTVYLVTIIGTLFALVGAIVTTIPEYRASKMAQKMEVSE* ->4 -MKGRWAKYVATGVMLAMLAACSSKPTDRGQQYKDGKFTQPFSLVNQPDAVGAPINAGDFAEQVDQIRSASPRLYTNQSNVYNAVQNWLRSGGDTRTMRQFGIDAWQMEGTDNYGNVQFTGYYTPVVQARHTRQGAFQYPIYSMPPKRGRLPSRAQIYAGALSDKYILAWSNSLMDNFIMDVQGSGYIDFGDGSPLNFFSYAGKNGWPYRSIGKVLIDRGEVKKEDMSMQAIREWGEKHSEAEVRELLEQNPSFVFFKPQSFAPVKGASAVPLIGRASVASDRSIIPPGTTLLAEVPLLDNNGKFSGQYELRLMVALDVGGAIKGQHFDIYQGIGPDAGHRAGWYNHYGRVWVLKSAPGAGNVFSG* ->5 -LFDRYDAGEQAVLVHIYFSQDKDMEDLQEFESLVSSAGVEAMQVITGSRKAPHPKYFVGEGKAVEIAEAVKATGAAVVLFDHALSPAQERNLERLCECRVIDRTGLILDIFAQRARTHEGKLQVELAQLRHLATRLVRGWTHLERQKGGIGLRGPGETQLETDRRLLRNRIVQIQSRLEKVEKQREQGRQSRIKADVPTVSLVGYTNAGKSTLFNQITEARVYAADQLFATLDPTLRRIDVADVGETVLADTVGFIRHLPYDLVAAFKATLQETRQATLLLHVVDAADVRVQENIEAVNTVLEEIDAHEIPTLMVMNKIDMLDDFEPRIDRDEENKPIRVWLSAQSGVGIPQLFQALTERLSGEVAQHTLRLLPQEGRLRSRFYQLQAIEKEWMEEDGSVSLQVRMPIVDWRRLCKQEPALIEYVI* ->6 -VAQRILVLGASGYIGQHLVFALSQQGHQVRAAARRVERLEKHRLANVSCHKVDLHWPENLPALLRDIDTVYYLVHGMGEGGDFIAHERQAALNVRDALRQTPVKQLIFLSSLQAPAHEQSDHLRARQLTADTLRDAGVPVTELRAGIIVGAGSAAFEVMRDMVYNLPILTPPRWVRSRTTPIALENLLYYLVGLLEHPAHEHRILEAAGPQVLSYQQQFERFMAVSGKRRPLIPVPFPTRWISVWFLNVITSVPPTTAKALIQGLRHDLLADDAALKKLIPQTLITFDDAVRRTLKEEEKLVNSSDWGYDALAFARWRPEYGYFPKQAGFTAQTPASLSALWQVVNRLGGKEGYFFGNILWQTRAAMDRLVGHKLAKGRPSHTLLKPGDTVDSWKVIIVEPEKQLTLLFGMKAPGLGRLSFTLHDKGRYREIDVRAWWHPHGMPGLIYWLLMIPAHLFIFRGMARRIARLAEQITEK* ->7 -MNKFARHFLPLSLRVRFLLATAGVVLVLSLAYGIVALVGYSVSFDKTTFRLLRGESNLFYTLAKWENNKISVELPENLDMQSPTMTLIYDETGKLLWTQRNIPWLIKSIQPEWLKTNGFHEIETNVDATSTLLSEDHSAQEKLKEVREDDDDAEMTHSVAVNIYPATTRMPQLTIVVVDTIPIELKRSYMVWSWFVYVLAANLLLVIPLLWIAAWWSLRPIEALAREVRELEDHHREMLNPETTRELTSLVRNLNQLLKSERERYNKYRTTLTDLTHSLKTPLAVLQSTLRSLRNEKMSVSKAEPVMLEQISRISQQIGYYLHRASMRGSGVLLSRELHPVAPLLDNLISALNKVYQRKGVNISMDISPEISFVGEQNDFVEVMGNVLDNACKYCLEFVEISARQTDDHLHIFVEDDGPGIPHSKRSLVFDRGQRADTLRPGQGVGLAVAREITEQYAGQIIASDSLLGGARMEVVFGRQHPTQKEE* ->8 -MTIQKRLLEAVEQKLLRPIDAQFALTVAGNDDPAVTLAAALLSHDAGEGHVCLPLSRLTLTEEAHPLLVAWISETATPIDWKKRLLASAAVSCGDSPAPLILCGDRLYLNRMWCNERTVARFFNEVNQAIAVDEDQLSRILDALFPPTDEVNWQKVAAAVALTRRISVISGGPGTGKTTTVAKLLAALIQMADGERCRIRLAAPTGKAAARLTESLGAALRQLPLTDAQKKRIPEDASTLHRLLGAQPGSQRLRHHAGNPLHLDVLVVDEASMIDLPMMSRLIDALPPHGRVIFLGDRDQLASVEAGAVLGDICAYVNAGFTAERARQLSRLTGSAIPAGAGTQAASLRDSLCLLQKSYRFGSDSGIGKLAAAINCGDRSAIQAVFQQGFSDIEKRTLQSSDDYAGMLDEALAGYGRYLRLLHEKAAPEAILQAFNEYQLLCALREGPFGVRGLNDRIEQAMVQQRKIQRHPHSRWYEGRPVMIARNDSALGLFNGDIGIALDRGQGLRVWFVMPDGTIKSVQPSRLPEHDTTWAMTVHKSQGSEFDHAALILPSQRSPVVTRELVYTAVTRARRRLSLYADERILAGAIVTRTERRSGLATLFDEVSRIG* ->9 -MQEVAMSSQEASKMLRTYNIAWWGNNYYDVNELGHISVCPDPDVPEARVDLAKLVKAREAQGQRLPALFCFPQILQHRLRSINAAFKRARESYGYNGDYFLVYPIKVNQHRRVIESLIHSGEPLGLEAGSKAELMAVLAHAGMTRSVIVCNGYKDREYIRLALIGEKMGHKVYLVIEKMSEIAIVLEEAERLNVVPRLGVRARLASQGSGKWQSSGGEKSKFGLAATQVLQLVETLRDAGRLDSLQLLHFHLGSQMANIRDIATGVRESARFYVELHKLGVNIQCFDVGGGLGVDYEGTRSQSDCSVNYGLNEYANNIIWAIGDACEEHGLPHPTVITESGRAVTAHHTVLVSNIIGVERNEYTDPTAPAEDAPRALQNLWETWQEMHKPGTRRSLREWLHDSQMDLHDIHIGYSSGAFSLQERAWAEQLYLSMCHEVQKQLDPQNRAHRPIIDELQERMADKMYVNFSLFQSMPDAWGIDQLFPVLPLEGLDQVPERRAVLLDITCDSDGAIDHYIDGDGIATTMPMPEYDPENPPMLGFFMVGAYQEILGNMHNLFGDTEAVDVFVFPDGSVEVELSDEGDTVADMLQYVQLDPKTLLTHFRDQVKQTDLDDALQQQFLEEFEAGLYGYTYLEDE* ->10 -MNSLPQRSTDFELTTSQDGFALSWQQRLILRHSAENPCLWIGAGVADIDMFRGNFSIKDKLNEKIALTEATVSELPDGWLVQFSRGATISATLRISADEAGRLTLDLQNDDLHHNRIWLRLAANPDDHIYGCGEQFSYFDLRGKPFPLWTSEQGVGRNKTSYVTWQADCKENAGGDYYWTFFPQPTFVSTQKYYCHVDNSCYMNFDFSAPEYHELALWEDKTTLRFECADTYIALLEKLTALLGRQPELPDWVYDGVTLGIQGGTEVCQQKLDTMRNAGVKVNGIWAQDWSGIRMTSFGKRVMWNWKWNSDNYPQLDSRIKQWKEEGVQFLSYINPYVASDKDLCAEAARHGYLAKDATGGDYLVEFGEFYGGVVDLTNPEAYDWFKDVIKKNMIALGCSGWMADFGEYLPTDTYLHNGVSAEIMHNAWPALWAKCNYEALQETGKLGEILFFMRAGYTGSQKYSTMMWAGDQNVDWSLDDGLASVVPAALSLAMTGHGLHHSDIGGYTTLFDMKRSKELLLRWCDFSAFTPMMRTHEGNRPGNNWQFDGDAETIAHFARMTTVFTTLKPYLKQAVAQNAATGLPVMRPLFLHYENDAATYTLKYQYLLGQDLLVAPVHEQGRCDWTLYLPEDHWVNIWTGEAHHGGEITVDAPIGKPPVFYRAKSEWALLFASLRNI* ->11 -MKLFLTTAALTATLISGMAFASDPVIPWATNSGGTESTHIAAMGEDLNAQHQQITHTHEGVWAANSGSIQADEAALTSNKPPVQGHPELMPHQG* ->12 -MKHLRVVACMIMLALAGCDNNDKTAPTTKSEAPAVAQPSPAQDPSQLQKLAQQSQGKALTLLDASEAQLDGAATLVLTFSIPLDPEQDFSRVVHVVDKKSGSVDGAWELAPNLKELRLRHLEPERVLVVTVDPAVKALNNATFGKSYEKTITTRDVQPSVGFASRGSLLPGKIAEGLPVMALNVNHVDVNFFRVKPGSLASFVSQWEYRSSLSNWESDNLLKMADLVYTGRFDLNPARNTREKLLLPLSDIKPLQQAGVYVAVMNQAGHYNYSNAATLFTLSDIGVSAHRYHSRLDIFTQSLENGAAQSGIEIVLLNDKGQTLAQATSDAQGHVQLEADKAAALLLARKEGQTTLLDLTLPALDLSEFNVAGAPGYSKQFFMFGPRDLYRPGETVILNGLLRDSDGKTLPDQPVKLEVVKPDGQVMRTVVSQPENGLYRLNYPLDINAPTGLWHVRANTGDNLLRSWDFHVEDFMPERMALNLTAQKTPLAPADEVKFSVVGYYLYGAPANGNTLQGQLFLRPLRDAVAALPGFQFGNIAEENLSRSLDEVQLTLDKGGRGEVSAASQWQEAHSPLQVILQASLLESGGRPVTRRVEQAIWPADTLPGIRPQFAAKAVYDYRTDTTVNQPIVDEDSNAAFDIVYANAQGEKKAVSGLQVRLIRERRDYYWNWSESEGWQSQFDQKDLVEGEQTLDLNADETGKVSFPVEWGAYRLEVKAPNETVSSVRFWAGYSWQDNSDGSGAARPDRVTLKLDKANYRPGDTMKLHIAAPVAGKGYAMVESSDGPLWWQAIDVPAQGLELTIPVDKTWNRHDLYLSTLVVRPGDKSRSATPKRAVGLLHLPLGDDNRRLDLALESPAKMRPNQPLTVRVKASVKHGEMPKQINVLVSAVDSGVLNITDYATPDPWQAFFGQKRYGADIYDIYGQVIEGQGRLAALRFGGDGDDLTRGGKPPVNHANIIAQQAQPITLNEQGEGVVTLPIGDFNGELRVMAQAWTADDFGRGESKVVVAAPVIAELNMPRFLAGGDVSRLVLDVTNLTDRPQTLNIALAASGLLELLSQQPQPVNLAPGVRTTLFVPVRALEGFGEGEIQATISGLNLPGETLGAQHKQWQIGVRPAWPAQTVNSGIALAPGESWHVPEQHLANVSPATLQGQLLLSGKPPLNLARYIRELKAYPYGCLEQTTSGLFPALYTNAAQLQSLGITGDSDEKRRAAVDIGISRILQMQRDNGGFALWDENGAEEPWLTAYAMDFLIRAGEQGYSVPPEAINRGNERLLRYLQDPGTMLIRYSDNTQASTFAAQAYAALVLARQQKAPLGALREIWERRSQAASGLPLMQLGIALNTMGDARRGEEAITLALNTPRQDERQWIADYGSSLRDNALMLSLLEENNLRPDAQNALLSSLSEQAFGQRWLSTQENNALFLAAHSRQASAGAWQAQTSLEAQPLSGDKALTRNLDADQLAALEVTNTGSQPLWLRLDSSGYPSSAPEPASNVLQIERQILGTDGQRKSLSSLRSGELVLVWLTVVADRNVPDALVVDLLPAGLELENQNLADSSASLPESGSEVQNLLNQMQQADIQYMEFRDDRFVAAVVVNEGQPVTLVYLARAVTPGTYQLAQPQVESMYAPQWRATGASEGLLIVTP* ->13 -MAKNRSRRLRKKMHIDEFQELGFSVAWRFPEGTSEEQIDKTVDDFINDVIEPNKLAFDGSGYLAWEGLICMQEIGKCTEEHQAIVRKWLEARNLEEVRTSELFDVWWD* ->14 -LKLIPFYLLALFSAASGATEINACKDLIGTWKTTADNPPYTMTILPPVEGCGEKCVKLNVQYELDVTHRNALYCHEGQEGVKGQGPMVIAFEGAYGGHAIGTYNRQLQLLWAGVIPKNKKGKWITKMENYWFRQVKAH* ->15 -MKKFYSCLPVFLLIGCAQVPLPSSVSKPVQQPGAQKEQLANANSIDECQSLPYVPSDLAKNKSLSNQNADNSASKNSAISSSIFCEKYKQTKEQALTFFQEHPQYMRSKEDEEQLMTEFKKVLLEPGSKNLSIYQTLLAAHERLQAL* ->16 -MRIKPDDNWRWYYDEEHDRMMLDLANGMLFRSRFSRKMLTPDAFCPTGFCVDDAALYFSFEEKCRDFELTKEQRAELVLNALVAIRYLKPQMPKSWHFVAHGEMWTPGTGDAASVWLSDTAEQVNLLVVEPGENAALCLLAQPGVVIAGRTMQLGDAIKIMNDRLKPQVHCHSFSLEQAV* ->17 -VLAFTLRFIKNKRYFAILAGALVIIAGLASQHAWSGNGLPQINGKALAALAKQHPVVVLFRHAERCDRSDNTCLSDSTGITVNGAQDARALGKAFSADIQNYNLYSSNTVRTIQSATWFSAGRSLTADKKMMDCGSGIYASINTLLKKSQNKNIVIFTHNHCLTYIAKNKRGVKFDPDYLNALVMYAENGKLFLDGEFVPG* ->18 -MGSNYIVIEGLEGAGKTTARDVVVETLEQLGIRNMIFTREPGGTQLAEKLRSLVLDIRSVGDEVITDKAEVLMFYAARVQLVETVIKPALAQGVWVIGDRHDLSTQAYQGGGRGIDQTMLATLRDAVLGDFRPDLTLYLDVTPEVGLKRARARGDLDRIEQESFDFFNRTRARYLELAAQDSRIRTIDATQPLDAVMRDIRATVTKWVQEQAA* ->19 -MKHIKKSVLVVLLTSHVAHASIVVGGTRLVFDGNNDESSINVENKDSKANLVQSWLSVADPQVTNKQVFIITPPLFRLDAGQKNSIRVIRSGAPLPADRESMYWLNIKGIPSIDDNASANRVEISINTQIKLIYRPPALTKSTPDSQSQQLKWQTAGDVITVNNPTPYYMNFASVTLNSHEVKSATFVPPKSSASFKLSSTAAPHGTVTWRLISDYGMSLEPHSGSF* diff --git a/tests/test_data/outputs/search/G1/run.json b/tests/test_data/outputs/search/G1/run.json index c036a0b..601ece0 100755 --- a/tests/test_data/outputs/search/G1/run.json +++ b/tests/test_data/outputs/search/G1/run.json @@ -1,11 +1,15 @@ { - "analysis_start_time": "10/06/2024 11:11:32", + "analysis_start_time": "19/06/2024 10:43:33", "parameters": { - "query": "locidex/extract/G1/raw.extracted.seqs.fasta", - "outdir": "locidex/search/G1", + "command": "search", + "query": "test_dev/extract/G1/raw.extracted.seqs.fasta", + "outdir": "test_dev/search/G1", + "db": "test_set/db", + "db_group": null, "name": "G1", - "db": "locidex/db", "config": null, + "db_name": "Locidex Test Database", + "db_version": "1.0.0", "min_evalue": 0.0001, "min_dna_len": 1, "min_aa_len": 1, @@ -20,8 +24,16 @@ "format": null, "translation_table": 11, "annotate": false, - "force": true + "force": true, + "db_date": "2024/06/06", + "db_author": "James Robertson", + "db_desc": "Using Salmonella GCA_000007545.2 as source data", + "db_num_seqs": 20, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" }, - "result_file": "locidex/search/G1/seq_store.json", - "analysis_end_time": "10/06/2024 11:11:34" + "result_file": "test_dev/search/G1/seq_store.json", + "analysis_end_time": "19/06/2024 10:43:34" } \ No newline at end of file diff --git a/tests/test_data/outputs/search/G1/seq_store.json b/tests/test_data/outputs/search/G1/seq_store.json index c96c860..89b454a 100755 --- a/tests/test_data/outputs/search/G1/seq_store.json +++ b/tests/test_data/outputs/search/G1/seq_store.json @@ -460,261 +460,301 @@ "parent_id": "locus_1:0:0:0", "locus_name": "locus_1:0:0:0", "seq_id": "locus_1:0:0:0", + "dna_seq": "atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa", + "dna_ambig_count": 0, "dna_hash": "d17b02d12afa7f832ee37df6f24a8f55", "dna_len": 102, + "aa_seq": "MYDPPFLEALMITASFFAIFIIIVVSVLLLEGD*", "aa_hash": "a931d1f75114576e60538364eb01a05f", "aa_len": 34, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "1": { "parent_id": "locus_10:9:0:1", "locus_name": "locus_10:9:0:1", "seq_id": "locus_10:9:0:1", + "dna_seq": "atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa", + "dna_ambig_count": 0, "dna_hash": "fe04d17ec353c08b903c85fc0ca4dc02", "dna_len": 762, + "aa_seq": "MRILLSNDDGVHAPGIQTLAKALREFADVQVVAPDRNRSGASNSLTLESSLRTFTFDNGDIAVQMGTPTDCVYLGVNALMRPRPDIVVSGINAGPNLGDDVIYSGTVAAAMEGRHLGFPALAVSLNGYQHYDTAAAVTCALLRGLSREPLRTGRILNVNVPDLPLAQVKGIRVTRCGSRHPADKVIPQEDPRGNTLYWIGPPGDKYDAGPDTDFAAVDEGYVSVTPLHVDLTAHSAHDVVSDWLDSVGVGTQW*", "aa_hash": "988bf512f0362e276b0e5622fbaa7079", "aa_len": 254, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "2": { "parent_id": "locus_11:10:0:2", "locus_name": "locus_11:10:0:2", "seq_id": "locus_11:10:0:2", + "dna_seq": "gtgaacctggtgaaaaccacgggtctgcacgcggcgggcaaaggcattaacgttgccaaagtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccgacttcaacttttccggctttgacgtgactcccgcagactgggaacgctttgttaacgactccctgagctggctgggtcagttcgatatggtctgcgtcagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatctttgatagtagccgtgaagcgttagtcgccggtcttaaagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcacgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgtgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggtaagtaccgtcggcgcaggcgattccatggttggcggattgatttacggcctgctgatgcgcgagtccagcgaacatacgctgcgtctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga", + "dna_ambig_count": 0, "dna_hash": "c4266f2f24fdd8e039113c6b0955af9f", "dna_len": 858, + "aa_seq": "VNLVKTTGLHAAGKGINVAKVLKDLGIDVTVGGFLGKDNQDGFQQLFSELGIANRFQVVQGRTRINVKLTEKDGEVTDFNFSGFDVTPADWERFVNDSLSWLGQFDMVCVSGSLPAGVSPEAFTDWMTRLRSQCPCIIFDSSREALVAGLKAAPWLVKPNRRELEIWAGRKLPEMKDVIDAAHALREQGIAHVVISLGAEGALWVNASGEWIAKPPAVDVVSTVGAGDSMVGGLIYGLLMRESSEHTLRLATAVAALAVSQSNVGITDRPQLAAMMARVDLQPFN*", "aa_hash": "9b9be0e0a2b6f84053716d6c14a0fb9a", "aa_len": 286, "start_codon": "gtg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "3": { "parent_id": "locus_12:11:0:3", "locus_name": "locus_12:11:0:3", "seq_id": "locus_12:11:0:3", + "dna_seq": "atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga", + "dna_ambig_count": 0, "dna_hash": "eb72da68c159497d5f0c8eeddc51b5ae", "dna_len": 972, + "aa_seq": "MNKNKYSTPLLMLATILAGMLSPMQSAVNGQLGHWLQDGNACAVISFASGLVVMFFIIIARKETRQQFASIPTLIKKRKIPLWNWFAGLCGAMVVFSEGASASALGVATFQTALISALLLSGLLCDRFGIGVEEKKYFTPWRITGALFAVIATIFVVSPQWHSTSFILLAILPFLAGLLAGWQPAGNAKVAEATGSMLVSITWNFIVGFCVLGAALAIRIALGHVTIQLPDTWWMYLGGPLGLLSIGLMAILVRGLGLLMLGVASTAGQLLGSVLIDELIPSLGNTVYLVTIIGTLFALVGAIVTTIPEYRASKMAQKMEVSE*", "aa_hash": "2fb77f6d0f615a030a08af74f3eda277", "aa_len": 324, "start_codon": "atg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "4": { "parent_id": "locus_13:12:0:4", "locus_name": "locus_13:12:0:4", "seq_id": "locus_13:12:0:4", + "dna_seq": "atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga", + "dna_ambig_count": 0, "dna_hash": "8f300259dcb46224bdc1fe5273107324", "dna_len": 1098, + "aa_seq": "MKGRWAKYVATGVMLAMLAACSSKPTDRGQQYKDGKFTQPFSLVNQPDAVGAPINAGDFAEQVDQIRSASPRLYTNQSNVYNAVQNWLRSGGDTRTMRQFGIDAWQMEGTDNYGNVQFTGYYTPVVQARHTRQGAFQYPIYSMPPKRGRLPSRAQIYAGALSDKYILAWSNSLMDNFIMDVQGSGYIDFGDGSPLNFFSYAGKNGWPYRSIGKVLIDRGEVKKEDMSMQAIREWGEKHSEAEVRELLEQNPSFVFFKPQSFAPVKGASAVPLIGRASVASDRSIIPPGTTLLAEVPLLDNNGKFSGQYELRLMVALDVGGAIKGQHFDIYQGIGPDAGHRAGWYNHYGRVWVLKSAPGAGNVFSG*", "aa_hash": "fd5140611af6e0d9f95426bf9beaa8b1", "aa_len": 366, "start_codon": "atg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "5": { "parent_id": "locus_14:13:0:5", "locus_name": "locus_14:13:0:5", "seq_id": "locus_14:13:0:5", + "dna_seq": "ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctcttccgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccaacgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcagaaaggcgggattggtttgcgcggtccgggtgaaacccagctcgaaaccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgaaaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgctgctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaaatccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtctgcaggtacgaatgccgattgtcgactggcgtcgcctctgtaaacaagaaccagcgttgatcgaatacgtgatctag", + "dna_ambig_count": 0, "dna_hash": "2fa0b06ed72e36b4071cab9d0b4f87d0", "dna_len": 1281, + "aa_seq": "LFDRYDAGEQAVLVHIYFSQDKDMEDLQEFESLVSSAGVEAMQVITGSRKAPHPKYFVGEGKAVEIAEAVKATGAAVVLFDHALSPAQERNLERLCECRVIDRTGLILDIFAQRARTHEGKLQVELAQLRHLATRLVRGWTHLERQKGGIGLRGPGETQLETDRRLLRNRIVQIQSRLEKVEKQREQGRQSRIKADVPTVSLVGYTNAGKSTLFNQITEARVYAADQLFATLDPTLRRIDVADVGETVLADTVGFIRHLPYDLVAAFKATLQETRQATLLLHVVDAADVRVQENIEAVNTVLEEIDAHEIPTLMVMNKIDMLDDFEPRIDRDEENKPIRVWLSAQSGVGIPQLFQALTERLSGEVAQHTLRLLPQEGRLRSRFYQLQAIEKEWMEEDGSVSLQVRMPIVDWRRLCKQEPALIEYVI*", "aa_hash": "bf5190f310477277da454725d434a8ee", "aa_len": 427, "start_codon": "ttg", - "stop_codon": "tag", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tag", + "count_internal_stop": 0 }, "6": { "parent_id": "locus_15:14:0:6", "locus_name": "locus_15:14:0:6", "seq_id": "locus_15:14:0:6", + "dna_seq": "gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga", + "dna_ambig_count": 0, "dna_hash": "bc98c2fe196a68a79036814396513a8d", "dna_len": 1434, + "aa_seq": "VAQRILVLGASGYIGQHLVFALSQQGHQVRAAARRVERLEKHRLANVSCHKVDLHWPENLPALLRDIDTVYYLVHGMGEGGDFIAHERQAALNVRDALRQTPVKQLIFLSSLQAPAHEQSDHLRARQLTADTLRDAGVPVTELRAGIIVGAGSAAFEVMRDMVYNLPILTPPRWVRSRTTPIALENLLYYLVGLLEHPAHEHRILEAAGPQVLSYQQQFERFMAVSGKRRPLIPVPFPTRWISVWFLNVITSVPPTTAKALIQGLRHDLLADDAALKKLIPQTLITFDDAVRRTLKEEEKLVNSSDWGYDALAFARWRPEYGYFPKQAGFTAQTPASLSALWQVVNRLGGKEGYFFGNILWQTRAAMDRLVGHKLAKGRPSHTLLKPGDTVDSWKVIIVEPEKQLTLLFGMKAPGLGRLSFTLHDKGRYREIDVRAWWHPHGMPGLIYWLLMIPAHLFIFRGMARRIARLAEQITEK*", "aa_hash": "5d8c3cf06444c3741ebfc57cc92db0d3", "aa_len": 478, "start_codon": "gtg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "7": { "parent_id": "locus_16:15:0:7", "locus_name": "locus_16:15:0:7", "seq_id": "locus_16:15:0:7", + "dna_seq": "atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctggtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatcagcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacgctgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcattcaaccggaatggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaacgctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgaggcgctggcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtgagctgaccagccttgtgcgcaaccttaatcaactgctcaaaagcgagcgcgaacgttataacaaataccgcacgaccctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatcagcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcataccccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttttggccgacagcatcccacacagaaagaggaataa", + "dna_ambig_count": 0, "dna_hash": "a9b3cb97dac3cda6e932a49bf9a507bd", "dna_len": 1464, + "aa_seq": "MNKFARHFLPLSLRVRFLLATAGVVLVLSLAYGIVALVGYSVSFDKTTFRLLRGESNLFYTLAKWENNKISVELPENLDMQSPTMTLIYDETGKLLWTQRNIPWLIKSIQPEWLKTNGFHEIETNVDATSTLLSEDHSAQEKLKEVREDDDDAEMTHSVAVNIYPATTRMPQLTIVVVDTIPIELKRSYMVWSWFVYVLAANLLLVIPLLWIAAWWSLRPIEALAREVRELEDHHREMLNPETTRELTSLVRNLNQLLKSERERYNKYRTTLTDLTHSLKTPLAVLQSTLRSLRNEKMSVSKAEPVMLEQISRISQQIGYYLHRASMRGSGVLLSRELHPVAPLLDNLISALNKVYQRKGVNISMDISPEISFVGEQNDFVEVMGNVLDNACKYCLEFVEISARQTDDHLHIFVEDDGPGIPHSKRSLVFDRGQRADTLRPGQGVGLAVAREITEQYAGQIIASDSLLGGARMEVVFGRQHPTQKEE*", "aa_hash": "3ca5f1d7b46eda9460608ef61603c12f", "aa_len": 488, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "8": { "parent_id": "locus_17:16:0:8", "locus_name": "locus_17:16:0:8", "seq_id": "locus_17:16:0:8", + "dna_seq": "atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa", + "dna_ambig_count": 0, "dna_hash": "a0d97d985483413f3c18bfe5833ae9ce", "dna_len": 1836, + "aa_seq": "MTIQKRLLEAVEQKLLRPIDAQFALTVAGNDDPAVTLAAALLSHDAGEGHVCLPLSRLTLTEEAHPLLVAWISETATPIDWKKRLLASAAVSCGDSPAPLILCGDRLYLNRMWCNERTVARFFNEVNQAIAVDEDQLSRILDALFPPTDEVNWQKVAAAVALTRRISVISGGPGTGKTTTVAKLLAALIQMADGERCRIRLAAPTGKAAARLTESLGAALRQLPLTDAQKKRIPEDASTLHRLLGAQPGSQRLRHHAGNPLHLDVLVVDEASMIDLPMMSRLIDALPPHGRVIFLGDRDQLASVEAGAVLGDICAYVNAGFTAERARQLSRLTGSAIPAGAGTQAASLRDSLCLLQKSYRFGSDSGIGKLAAAINCGDRSAIQAVFQQGFSDIEKRTLQSSDDYAGMLDEALAGYGRYLRLLHEKAAPEAILQAFNEYQLLCALREGPFGVRGLNDRIEQAMVQQRKIQRHPHSRWYEGRPVMIARNDSALGLFNGDIGIALDRGQGLRVWFVMPDGTIKSVQPSRLPEHDTTWAMTVHKSQGSEFDHAALILPSQRSPVVTRELVYTAVTRARRRLSLYADERILAGAIVTRTERRSGLATLFDEVSRIG*", "aa_hash": "bdd2b967a7a6337b5973dd723a62fa43", "aa_len": 612, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "9": { "parent_id": "locus_18:17:0:9", "locus_name": "locus_18:17:0:9", "seq_id": "locus_18:17:0:9", + "dna_seq": "atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag", + "dna_ambig_count": 0, "dna_hash": "b3021e979faa7600756c06dfadfcf14c", "dna_len": 1914, + "aa_seq": "MQEVAMSSQEASKMLRTYNIAWWGNNYYDVNELGHISVCPDPDVPEARVDLAKLVKAREAQGQRLPALFCFPQILQHRLRSINAAFKRARESYGYNGDYFLVYPIKVNQHRRVIESLIHSGEPLGLEAGSKAELMAVLAHAGMTRSVIVCNGYKDREYIRLALIGEKMGHKVYLVIEKMSEIAIVLEEAERLNVVPRLGVRARLASQGSGKWQSSGGEKSKFGLAATQVLQLVETLRDAGRLDSLQLLHFHLGSQMANIRDIATGVRESARFYVELHKLGVNIQCFDVGGGLGVDYEGTRSQSDCSVNYGLNEYANNIIWAIGDACEEHGLPHPTVITESGRAVTAHHTVLVSNIIGVERNEYTDPTAPAEDAPRALQNLWETWQEMHKPGTRRSLREWLHDSQMDLHDIHIGYSSGAFSLQERAWAEQLYLSMCHEVQKQLDPQNRAHRPIIDELQERMADKMYVNFSLFQSMPDAWGIDQLFPVLPLEGLDQVPERRAVLLDITCDSDGAIDHYIDGDGIATTMPMPEYDPENPPMLGFFMVGAYQEILGNMHNLFGDTEAVDVFVFPDGSVEVELSDEGDTVADMLQYVQLDPKTLLTHFRDQVKQTDLDDALQQQFLEEFEAGLYGYTYLEDE*", "aa_hash": "42c4a831ee79a27c47138fe96829814b", "aa_len": 638, "start_codon": "atg", - "stop_codon": "tag", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tag", + "count_internal_stop": 0 }, "10": { "parent_id": "locus_19:18:0:10", "locus_name": "locus_19:18:0:10", "seq_id": "locus_19:18:0:10", + "dna_seq": "atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacattgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccaccgtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggccgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattactggaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcattcagggcggtacggaagtttgtcagcaaaaactggataccatgcgcaacgcaggcgtaaaagtgaacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggcgtcagcgcagagatcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacaggagaccggcaagctcggcgagatcctgttctttatgcgtgcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccggcaataactggcagttcgacggcgacgcggaaactattgcccactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgaaattaccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa", + "dna_ambig_count": 0, "dna_hash": "de32372598811d63bcc1a0eaf6872644", "dna_len": 2037, + "aa_seq": "MNSLPQRSTDFELTTSQDGFALSWQQRLILRHSAENPCLWIGAGVADIDMFRGNFSIKDKLNEKIALTEATVSELPDGWLVQFSRGATISATLRISADEAGRLTLDLQNDDLHHNRIWLRLAANPDDHIYGCGEQFSYFDLRGKPFPLWTSEQGVGRNKTSYVTWQADCKENAGGDYYWTFFPQPTFVSTQKYYCHVDNSCYMNFDFSAPEYHELALWEDKTTLRFECADTYIALLEKLTALLGRQPELPDWVYDGVTLGIQGGTEVCQQKLDTMRNAGVKVNGIWAQDWSGIRMTSFGKRVMWNWKWNSDNYPQLDSRIKQWKEEGVQFLSYINPYVASDKDLCAEAARHGYLAKDATGGDYLVEFGEFYGGVVDLTNPEAYDWFKDVIKKNMIALGCSGWMADFGEYLPTDTYLHNGVSAEIMHNAWPALWAKCNYEALQETGKLGEILFFMRAGYTGSQKYSTMMWAGDQNVDWSLDDGLASVVPAALSLAMTGHGLHHSDIGGYTTLFDMKRSKELLLRWCDFSAFTPMMRTHEGNRPGNNWQFDGDAETIAHFARMTTVFTTLKPYLKQAVAQNAATGLPVMRPLFLHYENDAATYTLKYQYLLGQDLLVAPVHEQGRCDWTLYLPEDHWVNIWTGEAHHGGEITVDAPIGKPPVFYRAKSEWALLFASLRNI*", "aa_hash": "a48a4e4dc8c7f61a7be06a7f72142198", "aa_len": 679, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "11": { "parent_id": "locus_2:1:0:11", "locus_name": "locus_2:1:0:11", "seq_id": "locus_2:1:0:11", + "dna_seq": "atgaaactttttctgacaacggcagcgctaactgcgaccttgatttccggcatggcatttgccagcgatcccgtcatcccctgggcgactaacagcggcggtacggaaagcacgcatattgcggcaatgggcgaggatttgaatgcgcagcatcagcagatcacccacacgcatgaaggcgtatgggcggccaattccggcagcattcaggctgatgaagccgcgctgaccagcaacaagccgccggtacaaggccacccggaattgatgccgcatcagggctaa", + "dna_ambig_count": 0, "dna_hash": "8b70e777f6bbf2c91ff75947824b5976", "dna_len": 285, + "aa_seq": "MKLFLTTAALTATLISGMAFASDPVIPWATNSGGTESTHIAAMGEDLNAQHQQITHTHEGVWAANSGSIQADEAALTSNKPPVQGHPELMPHQG*", "aa_hash": "6e403f4ed2da629ea2ebfe18278ed120", "aa_len": 95, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "12": { "parent_id": "locus_20:19:0:12", "locus_name": "locus_20:19:0:12", "seq_id": "locus_20:19:0:12", + "dna_seq": "atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa", + "dna_ambig_count": 0, "dna_hash": "4461918e985715e4a2b07494e1f91326", "dna_len": 4935, + "aa_seq": "MKHLRVVACMIMLALAGCDNNDKTAPTTKSEAPAVAQPSPAQDPSQLQKLAQQSQGKALTLLDASEAQLDGAATLVLTFSIPLDPEQDFSRVVHVVDKKSGSVDGAWELAPNLKELRLRHLEPERVLVVTVDPAVKALNNATFGKSYEKTITTRDVQPSVGFASRGSLLPGKIAEGLPVMALNVNHVDVNFFRVKPGSLASFVSQWEYRSSLSNWESDNLLKMADLVYTGRFDLNPARNTREKLLLPLSDIKPLQQAGVYVAVMNQAGHYNYSNAATLFTLSDIGVSAHRYHSRLDIFTQSLENGAAQSGIEIVLLNDKGQTLAQATSDAQGHVQLEADKAAALLLARKEGQTTLLDLTLPALDLSEFNVAGAPGYSKQFFMFGPRDLYRPGETVILNGLLRDSDGKTLPDQPVKLEVVKPDGQVMRTVVSQPENGLYRLNYPLDINAPTGLWHVRANTGDNLLRSWDFHVEDFMPERMALNLTAQKTPLAPADEVKFSVVGYYLYGAPANGNTLQGQLFLRPLRDAVAALPGFQFGNIAEENLSRSLDEVQLTLDKGGRGEVSAASQWQEAHSPLQVILQASLLESGGRPVTRRVEQAIWPADTLPGIRPQFAAKAVYDYRTDTTVNQPIVDEDSNAAFDIVYANAQGEKKAVSGLQVRLIRERRDYYWNWSESEGWQSQFDQKDLVEGEQTLDLNADETGKVSFPVEWGAYRLEVKAPNETVSSVRFWAGYSWQDNSDGSGAARPDRVTLKLDKANYRPGDTMKLHIAAPVAGKGYAMVESSDGPLWWQAIDVPAQGLELTIPVDKTWNRHDLYLSTLVVRPGDKSRSATPKRAVGLLHLPLGDDNRRLDLALESPAKMRPNQPLTVRVKASVKHGEMPKQINVLVSAVDSGVLNITDYATPDPWQAFFGQKRYGADIYDIYGQVIEGQGRLAALRFGGDGDDLTRGGKPPVNHANIIAQQAQPITLNEQGEGVVTLPIGDFNGELRVMAQAWTADDFGRGESKVVVAAPVIAELNMPRFLAGGDVSRLVLDVTNLTDRPQTLNIALAASGLLELLSQQPQPVNLAPGVRTTLFVPVRALEGFGEGEIQATISGLNLPGETLGAQHKQWQIGVRPAWPAQTVNSGIALAPGESWHVPEQHLANVSPATLQGQLLLSGKPPLNLARYIRELKAYPYGCLEQTTSGLFPALYTNAAQLQSLGITGDSDEKRRAAVDIGISRILQMQRDNGGFALWDENGAEEPWLTAYAMDFLIRAGEQGYSVPPEAINRGNERLLRYLQDPGTMLIRYSDNTQASTFAAQAYAALVLARQQKAPLGALREIWERRSQAASGLPLMQLGIALNTMGDARRGEEAITLALNTPRQDERQWIADYGSSLRDNALMLSLLEENNLRPDAQNALLSSLSEQAFGQRWLSTQENNALFLAAHSRQASAGAWQAQTSLEAQPLSGDKALTRNLDADQLAALEVTNTGSQPLWLRLDSSGYPSSAPEPASNVLQIERQILGTDGQRKSLSSLRSGELVLVWLTVVADRNVPDALVVDLLPAGLELENQNLADSSASLPESGSEVQNLLNQMQQADIQYMEFRDDRFVAAVVVNEGQPVTLVYLARAVTPGTYQLAQPQVESMYAPQWRATGASEGLLIVTP*", "aa_hash": "ffe361d9273afbab0ed9009dd5aa4041", "aa_len": 1645, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "13": { "parent_id": "locus_3:2:0:13", "locus_name": "locus_3:2:0:13", "seq_id": "locus_3:2:0:13", + "dna_seq": "atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa", + "dna_ambig_count": 0, "dna_hash": "670705cd2a59c4a23a897ac656a888fe", "dna_len": 327, + "aa_seq": "MAKNRSRRLRKKMHIDEFQELGFSVAWRFPEGTSEEQIDKTVDDFINDVIEPNKLAFDGSGYLAWEGLICMQEIGKCTEEHQAIVRKWLEARNLEEVRTSELFDVWWD*", "aa_hash": "f46f4f7b2362abaf1440c9752e10670e", "aa_len": 109, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "14": { "parent_id": "locus_4:3:0:14", "locus_name": "locus_4:3:0:14", "seq_id": "locus_4:3:0:14", + "dna_seq": "ctgaaactaatacctttttatttgctcgcgctattttctgccgcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaaccactgcagataacccgccttataccatgacaatattgccgccagtagaaggctgcggggaaaaatgtgtgaaactgaacgtacagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaaggacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctataaccgacagctgcaattactttgggctggcgtaataccgaaaaataaaaaggggaaatggataacgaaaatggaaaattactggttcaggcaggtaaaagcgcactaa", + "dna_ambig_count": 0, "dna_hash": "73790840c76943caac0ebb3b2b3f0b98", "dna_len": 417, + "aa_seq": "LKLIPFYLLALFSAASGATEINACKDLIGTWKTTADNPPYTMTILPPVEGCGEKCVKLNVQYELDVTHRNALYCHEGQEGVKGQGPMVIAFEGAYGGHAIGTYNRQLQLLWAGVIPKNKKGKWITKMENYWFRQVKAH*", "aa_hash": "77784601d754a5f36152853592023b08", "aa_len": 139, "start_codon": "ctg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "15": { "parent_id": "locus_5:4:0:15", "locus_name": "locus_5:4:0:15", "seq_id": "locus_5:4:0:15", + "dna_seq": "atgaaaaaattttatagctgtcttcctgtctttttactgatcggctgtgcccaggtgcccctcccttcctccgtgagcaaaccggtacagcaacctggcgctcagaaagagcaactggccaacgcaaatagtattgatgagtgtcagtctcttccgtatgtgccgtcagaccttgcgaagaataaatcattatcaaaccagaacgctgataattccgcatcaaaaaatagcgcaatcagctcaagcattttttgcgaaaaatataaacaaaccaaagaacaggcgcttaccttcttccaggaacatccacaatacatgcgttcgaaagaagatgaagagcaactcatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa", + "dna_ambig_count": 0, "dna_hash": "8cf4341689dd00f74adfcc43d1f4a35e", "dna_len": 444, + "aa_seq": "MKKFYSCLPVFLLIGCAQVPLPSSVSKPVQQPGAQKEQLANANSIDECQSLPYVPSDLAKNKSLSNQNADNSASKNSAISSSIFCEKYKQTKEQALTFFQEHPQYMRSKEDEEQLMTEFKKVLLEPGSKNLSIYQTLLAAHERLQAL*", "aa_hash": "736cc3184dda2c5ac596f76753272622", "aa_len": 148, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "16": { "parent_id": "locus_6:5:0:16", "locus_name": "locus_6:5:0:16", "seq_id": "locus_6:5:0:16", + "dna_seq": "atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa", + "dna_ambig_count": 0, "dna_hash": "a11561f2804e2c32c78049f8b9aeb517", "dna_len": 543, + "aa_seq": "MRIKPDDNWRWYYDEEHDRMMLDLANGMLFRSRFSRKMLTPDAFCPTGFCVDDAALYFSFEEKCRDFELTKEQRAELVLNALVAIRYLKPQMPKSWHFVAHGEMWTPGTGDAASVWLSDTAEQVNLLVVEPGENAALCLLAQPGVVIAGRTMQLGDAIKIMNDRLKPQVHCHSFSLEQAV*", "aa_hash": "f7a904be6607ca5a6d9388ad9ca3693a", "aa_len": 181, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "17": { "parent_id": "locus_7:6:0:17", "locus_name": "locus_7:6:0:17", "seq_id": "locus_7:6:0:17", + "dna_seq": "gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggcctcacaacatgcctggagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgtcagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcaccatccagtcggcaacctggttttccgcaggcaggtcgcttacggcggataaaaaaatgatggattgcggcagcggtatttatgcttcaatcaatacgttactgaaaaagtcgcagaataaaaatatagtcatttttacccataatcattgtttgacgtatattgcaaaaaataagcgcggcgtgaagtttgatccggactatcttaatgcgctcgttatgtatgcggaaaacggaaaactctttttagacggtgaatttgttcccggctag", + "dna_ambig_count": 0, "dna_hash": "49d9878c9d3071aa1d2f26cb947b784c", "dna_len": 606, + "aa_seq": "VLAFTLRFIKNKRYFAILAGALVIIAGLASQHAWSGNGLPQINGKALAALAKQHPVVVLFRHAERCDRSDNTCLSDSTGITVNGAQDARALGKAFSADIQNYNLYSSNTVRTIQSATWFSAGRSLTADKKMMDCGSGIYASINTLLKKSQNKNIVIFTHNHCLTYIAKNKRGVKFDPDYLNALVMYAENGKLFLDGEFVPG*", "aa_hash": "a1169e1ef4c2882247a9349da07cb6bd", "aa_len": 202, "start_codon": "gtg", - "stop_codon": "tag", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tag", + "count_internal_stop": 0 }, "18": { "parent_id": "locus_8:7:0:18", "locus_name": "locus_8:7:0:18", "seq_id": "locus_8:7:0:18", + "dna_seq": "atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga", + "dna_ambig_count": 0, "dna_hash": "7ebe74afecf146ec4db816c8deced64f", "dna_len": 642, + "aa_seq": "MGSNYIVIEGLEGAGKTTARDVVVETLEQLGIRNMIFTREPGGTQLAEKLRSLVLDIRSVGDEVITDKAEVLMFYAARVQLVETVIKPALAQGVWVIGDRHDLSTQAYQGGGRGIDQTMLATLRDAVLGDFRPDLTLYLDVTPEVGLKRARARGDLDRIEQESFDFFNRTRARYLELAAQDSRIRTIDATQPLDAVMRDIRATVTKWVQEQAA*", "aa_hash": "ad5a47cb3a55985083b963b0cf67f382", "aa_len": 214, "start_codon": "atg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "19": { "parent_id": "locus_9:8:0:19", "locus_name": "locus_9:8:0:19", "seq_id": "locus_9:8:0:19", + "dna_seq": "atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag", + "dna_ambig_count": 0, "dna_hash": "41ebb36872854b2b33c8c028e23d8ad1", "dna_len": 684, + "aa_seq": "MKHIKKSVLVVLLTSHVAHASIVVGGTRLVFDGNNDESSINVENKDSKANLVQSWLSVADPQVTNKQVFIITPPLFRLDAGQKNSIRVIRSGAPLPADRESMYWLNIKGIPSIDDNASANRVEISINTQIKLIYRPPALTKSTPDSQSQQLKWQTAGDVITVNNPTPYYMNFASVTLNSHEVKSATFVPPKSSASFKLSSTAAPHGTVTWRLISDYGMSLEPHSGSF*", "aa_hash": "02989d1dbb505d15f06e76ddc9174b7e", "aa_len": 228, "start_codon": "atg", - "stop_codon": "tag", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tag", + "count_internal_stop": 0 } }, "query_hit_columns": [], diff --git a/tests/test_data/outputs/search/G10/blast/nucleotide/hsps.txt b/tests/test_data/outputs/search/G10/blast/nucleotide/hsps.txt deleted file mode 100755 index d91fbd6..0000000 --- a/tests/test_data/outputs/search/G10/blast/nucleotide/hsps.txt +++ /dev/null @@ -1,19 +0,0 @@ -0 0 102 102 1 102 1 102 102 0 100.000 100 100 plus 1.80e-51 189 -1 9 762 762 1 762 1 762 762 0 100.000 100 100 plus 0.0 1408 -2 11 972 972 1 972 1 972 972 0 100.000 100 100 plus 0.0 1796 -3 12 1098 1098 1 1098 1 1098 1098 0 100.000 100 100 plus 0.0 2028 -4 13 1281 1281 1 1281 1 1281 1281 0 100.000 100 100 plus 0.0 2366 -5 14 1434 1434 1 1434 1 1434 1434 0 100.000 100 100 plus 0.0 2649 -6 15 1464 1464 1 1464 1 1464 1464 0 100.000 100 100 plus 0.0 2704 -7 16 1836 1836 1 1836 1 1836 1836 0 100.000 100 100 plus 0.0 3391 -8 17 1914 1914 1 1914 1 1914 1914 0 100.000 100 100 plus 0.0 3535 -9 18 2037 2037 1 2037 1 2037 2037 0 100.000 100 100 plus 0.0 3762 -10 1 285 285 1 285 1 285 285 0 100.000 100 100 plus 1.03e-152 527 -11 19 4935 4935 1 4935 1 4935 4935 0 100.000 100 100 plus 0.0 9114 -12 2 327 327 1 327 1 327 327 0 100.000 100 100 plus 5.34e-176 604 -13 3 417 417 1 417 1 417 417 0 100.000 100 100 plus 0.0 771 -14 4 444 444 1 444 1 444 444 0 100.000 100 100 plus 0.0 821 -15 5 543 543 1 543 1 543 543 0 100.000 100 100 plus 0.0 1003 -16 6 606 606 1 606 1 606 606 0 100.000 100 100 plus 0.0 1120 -17 7 642 642 1 642 1 642 642 0 100.000 100 100 plus 0.0 1186 -18 8 684 684 1 684 1 684 684 0 100.000 100 100 plus 0.0 1264 diff --git a/tests/test_data/outputs/search/G10/blast/nucleotide/queries.fasta b/tests/test_data/outputs/search/G10/blast/nucleotide/queries.fasta deleted file mode 100755 index 2bab250..0000000 --- a/tests/test_data/outputs/search/G10/blast/nucleotide/queries.fasta +++ /dev/null @@ -1,38 +0,0 @@ ->0 -atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa ->1 -atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa ->2 -atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga ->3 -atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga ->4 -ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctattacgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccagcgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcggaaaggcgggattggttcgcgcggtccgggtgaaacccagctcgaagccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgagaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgcagctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaattccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtccgcaggtacgaatgccgattgtcgactggcgtcgcttctgtaaacaagaaccagcgttgatcgaatacgtgatctag ->5 -gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga ->6 -atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctcgtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatctgcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacggtgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcactcaaccggaagggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaaggctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgatgcgctgtcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtaagctgatcagccttgtgcgcaaccttaatcaactgctcaaaagcgagcacgaacgttataacaaataccgcacgagcctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatctgcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcatagcccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttctggccgacagcatcccacacagaaagaggaataa ->7 -atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa ->8 -atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag ->9 -atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacatcgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccacagtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggtcgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattacttgaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcatacagggcggtacggaagtttgtcagcaaaaactggataacatgcgcaacgcaggcgtaaaagtgtacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggggtcagcgcagagctcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacagaagaccggcaagctcggcgagatcctgttctttatgcgggcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccagcaatgactggcagttcgacggcgacgcggaaactattgcacactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgagatttccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa ->10 -atgaaactttttctgacaacggcagcgctaactgcgaccttgacttccggcatgggatttgccagcgataccgtgatcccctgggcgactaacagtggcggtacggaaagcacgcatatcgcggcaatgggcgaggatgtgaatgcgcagcatcagcacatcatccacacgcatgaaggcgtatgtgccgcaaattccggcaccattcaggctgatgaagccgcgctgaccagcaacaagccaccggtacaagtccaaccggaattgttgccgcatcagggctaa ->11 -atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa ->12 -atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa ->13 -ctgaaactaatacctttttatttgctcgcgctattttctgcctcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaacccctgcagataacccgccttataccgtgacaatattgccgccagtagaagcctgcggggaaaaatgtgtgaaactgaacgtagagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaacgacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctacaaccgacagctgcaattacttagggctggcgtaataccgaaaaataaaaagtggaaatggataacgaagatggaaaattactggttcaggcgggtaaaagcgcactaa ->14 -atgaaaaaatttgatagctgtcatcctgtctttttactgatcggctgtgcccaggtgcctctccgttcctccgtgagcaaaccggtacagcaacctagcgctcagaaagagcaactggccaacgcaaatggtattgatgagtgtcagtcacttccgtatgtgccttcagaccttgcgaagaataaatcattatcaaaccagatcgctgataataccgcatcaaaaaatagcgcaatcagctcacgcattttttgcgaaaaatataagcaaaccaaagaacaggcgcttaccttcttccaggaacttccacaatacatgcgttcgaaagaagttgaagagcaacacatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa ->15 -atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa ->16 -gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggactcacaacatgcctgtagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgccagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcgccatccagtcggcaacctggttttcggcaggcaggtcgcgtagggcggttaaaaaaatgatggattgcggcagcggtatttattcttcaatcaatacgttactgaaaaattcgcagattaaaaatatagtcattttaacccataatcattgtttgacgtatattgtaaaaaataagcgcggcgtgaagtttgatccggagtatcttaatgcgctcgttatgtatgcggaaaacggaaaactccttttagacggtgaatttgttcccggctag ->17 -atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga ->18 -atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/search/G10/blast/protein/hsps.txt b/tests/test_data/outputs/search/G10/blast/protein/hsps.txt deleted file mode 100755 index e6894a0..0000000 --- a/tests/test_data/outputs/search/G10/blast/protein/hsps.txt +++ /dev/null @@ -1,19 +0,0 @@ -0 0 34 34 1 34 1 34 34 0 100.000 100 100 N/A 4.94e-20 64.3 -1 9 254 254 1 254 1 254 254 0 100.000 100 100 N/A 0.0 514 -2 11 324 324 1 324 1 324 324 0 100.000 100 100 N/A 0.0 640 -3 12 366 366 1 366 1 366 366 0 100.000 100 100 N/A 0.0 754 -4 13 427 427 1 427 1 427 427 0 100.000 100 100 N/A 0.0 869 -5 14 478 478 1 478 1 478 478 0 100.000 100 100 N/A 0.0 972 -6 15 488 488 1 488 1 488 488 0 100.000 100 100 N/A 0.0 1004 -7 16 612 612 1 612 1 612 612 0 100.000 100 100 N/A 0.0 1241 -8 17 638 638 1 638 1 638 638 0 100.000 100 100 N/A 0.0 1326 -9 18 679 679 1 679 1 679 679 0 100.000 100 100 N/A 0.0 1419 -10 1 95 95 1 95 1 95 95 0 100.000 100 100 N/A 1.23e-68 191 -11 19 1645 1645 1 1645 1 1645 1645 0 100.000 100 100 N/A 0.0 3332 -12 2 109 109 1 109 1 109 109 0 100.000 100 100 N/A 3.09e-81 224 -13 3 139 139 1 139 1 139 139 0 100.000 100 100 N/A 2.34e-106 290 -14 4 148 148 1 148 1 148 148 0 100.000 100 100 N/A 1.04e-110 302 -15 5 181 181 1 181 1 181 181 0 100.000 100 100 N/A 1.79e-140 380 -16 6 202 202 1 202 1 202 202 0 100.000 100 100 N/A 3.71e-154 416 -17 7 214 214 1 214 1 214 214 0 100.000 100 100 N/A 1.75e-157 426 -18 8 228 228 1 228 1 228 228 0 100.000 100 100 N/A 6.32e-172 463 diff --git a/tests/test_data/outputs/search/G10/blast/protein/queries.fasta b/tests/test_data/outputs/search/G10/blast/protein/queries.fasta deleted file mode 100755 index 4396cd1..0000000 --- a/tests/test_data/outputs/search/G10/blast/protein/queries.fasta +++ /dev/null @@ -1,38 +0,0 @@ ->0 -MYDPPFLEALMITASFFAIFIIIVVSVLLLEGD* ->1 -MRILLSNDDGVHAPGIQTLAKALREFADVQVVAPDRNRSGASNSLTLESSLRTFTFDNGDIAVQMGTPTDCVYLGVNALMRPRPDIVVSGINAGPNLGDDVIYSGTVAAAMEGRHLGFPALAVSLNGYQHYDTAAAVTCALLRGLSREPLRTGRILNVNVPDLPLAQVKGIRVTRCGSRHPADKVIPQEDPRGNTLYWIGPPGDKYDAGPDTDFAAVDEGYVSVTPLHVDLTAHSAHDVVSDWLDSVGVGTQW* ->2 -MNKNKYSTPLLMLATILAGMLSPMQSAVNGQLGHWLQDGNACAVISFASGLVVMFFIIIARKETRQQFASIPTLIKKRKIPLWNWFAGLCGAMVVFSEGASASALGVATFQTALISALLLSGLLCDRFGIGVEEKKYFTPWRITGALFAVIATIFVVSPQWHSTSFILLAILPFLAGLLAGWQPAGNAKVAEATGSMLVSITWNFIVGFCVLGAALAIRIALGHVTIQLPDTWWMYLGGPLGLLSIGLMAILVRGLGLLMLGVASTAGQLLGSVLIDELIPSLGNTVYLVTIIGTLFALVGAIVTTIPEYRASKMAQKMEVSE* ->3 -MKGRWAKYVATGVMLAMLAACSSKPTDRGQQYKDGKFTQPFSLVNQPDAVGAPINAGDFAEQVDQIRSASPRLYTNQSNVYNAVQNWLRSGGDTRTMRQFGIDAWQMEGTDNYGNVQFTGYYTPVVQARHTRQGAFQYPIYSMPPKRGRLPSRAQIYAGALSDKYILAWSNSLMDNFIMDVQGSGYIDFGDGSPLNFFSYAGKNGWPYRSIGKVLIDRGEVKKEDMSMQAIREWGEKHSEAEVRELLEQNPSFVFFKPQSFAPVKGASAVPLIGRASVASDRSIIPPGTTLLAEVPLLDNNGKFSGQYELRLMVALDVGGAIKGQHFDIYQGIGPDAGHRAGWYNHYGRVWVLKSAPGAGNVFSG* ->4 -LFDRYDAGEQAVLVHIYFSQDKDMEDLQEFESLVYYAGVEAMQVITGSRKAPHPKYFVGEGKAVEIAEAVKATGAAVVLFDHALSPAQERNLERLCECRVIDRTGLILDIFAQRARTHEGKLQVELAQLRHLATRLVRGWTHLERRKGGIGSRGPGETQLEADRRLLRNRIVQIQSRLEKVEKQREQGRQSRIKADVPTVSLVGYTNAGKSTLFNQITEARVYAADQLFATLDPTLRRIDVADVGETVLADTVGFIRHLPYDLVAAFKATLQETRQATQLLHVVDAADVRVQENIEAVNTVLEEIDAHEFPTLMVMNKIDMLDDFEPRIDRDEENKPIRVWLSAQSGVGIPQLFQALTERLSGEVAQHTLRLLPQEGRLRSRFYQLQAIEKEWMEEDGSVSPQVRMPIVDWRRFCKQEPALIEYVI* ->5 -VAQRILVLGASGYIGQHLVFALSQQGHQVRAAARRVERLEKHRLANVSCHKVDLHWPENLPALLRDIDTVYYLVHGMGEGGDFIAHERQAALNVRDALRQTPVKQLIFLSSLQAPAHEQSDHLRARQLTADTLRDAGVPVTELRAGIIVGAGSAAFEVMRDMVYNLPILTPPRWVRSRTTPIALENLLYYLVGLLEHPAHEHRILEAAGPQVLSYQQQFERFMAVSGKRRPLIPVPFPTRWISVWFLNVITSVPPTTAKALIQGLRHDLLADDAALKKLIPQTLITFDDAVRRTLKEEEKLVNSSDWGYDALAFARWRPEYGYFPKQAGFTAQTPASLSALWQVVNRLGGKEGYFFGNILWQTRAAMDRLVGHKLAKGRPSHTLLKPGDTVDSWKVIIVEPEKQLTLLFGMKAPGLGRLSFTLHDKGRYREIDVRAWWHPHGMPGLIYWLLMIPAHLFIFRGMARRIARLAEQITEK* ->6 -MNKFARHFLPLSLRVRFLLATAGVVLVLSLAYGIVALVGYSVSFDKTTFRLLRGESNLFYTLAKWENNKICVELPENLDMQSPTMTVIYDETGKLLWTQRNIPWLIKSTQPEGLKTNGFHEIETNVDATSTLLSEDHSAQEKLKEVREDDDDAEMTHSVAVNIYPATTRMPQLTIVVVDTIPIELKGSYMVWSWFVYVLAANLLLVIPLLWIAAWWSLRPIDALSREVRELEDHHREMLNPETTRKLISLVRNLNQLLKSEHERYNKYRTSLTDLTHSLKTPLAVLQSTLRSLRNEKMSVSKAEPVMLEQISRISQQIGYYLHRASMRGSGVLLSRELHPVAPLLDNLISALNKVYLRKGVNISMDISPEISFVGEQNDFVEVMGNVLDNACKYCLEFVEISARQTDDHLHIFVEDDGPGIAHSKRSLVFDRGQRADTLRPGQGVGLAVAREITEQYAGQIIASDSLLGGARMEVVSGRQHPTQKEE* ->7 -MTIQKRLLEAVEQKLLRPIDAQFALTVAGNDDPAVTLAAALLSHDAGEGHVCLPLSRLTLTEEAHPLLVAWISETATPIDWKKRLLASAAVSCGDSPAPLILCGDRLYLNRMWCNERTVARFFNEVNQAIAVDEDQLSRILDALFPPTDEVNWQKVAAAVALTRRISVISGGPGTGKTTTVAKLLAALIQMADGERCRIRLAAPTGKAAARLTESLGAALRQLPLTDAQKKRIPEDASTLHRLLGAQPGSQRLRHHAGNPLHLDVLVVDEASMIDLPMMSRLIDALPPHGRVIFLGDRDQLASVEAGAVLGDICAYVNAGFTAERARQLSRLTGSAIPAGAGTQAASLRDSLCLLQKSYRFGSDSGIGKLAAAINCGDRSAIQAVFQQGFSDIEKRTLQSSDDYAGMLDEALAGYGRYLRLLHEKAAPEAILQAFNEYQLLCALREGPFGVRGLNDRIEQAMVQQRKIQRHPHSRWYEGRPVMIARNDSALGLFNGDIGIALDRGQGLRVWFVMPDGTIKSVQPSRLPEHDTTWAMTVHKSQGSEFDHAALILPSQRSPVVTRELVYTAVTRARRRLSLYADERILAGAIVTRTERRSGLATLFDEVSRIG* ->8 -MQEVAMSSQEASKMLRTYNIAWWGNNYYDVNELGHISVCPDPDVPEARVDLAKLVKAREAQGQRLPALFCFPQILQHRLRSINAAFKRARESYGYNGDYFLVYPIKVNQHRRVIESLIHSGEPLGLEAGSKAELMAVLAHAGMTRSVIVCNGYKDREYIRLALIGEKMGHKVYLVIEKMSEIAIVLEEAERLNVVPRLGVRARLASQGSGKWQSSGGEKSKFGLAATQVLQLVETLRDAGRLDSLQLLHFHLGSQMANIRDIATGVRESARFYVELHKLGVNIQCFDVGGGLGVDYEGTRSQSDCSVNYGLNEYANNIIWAIGDACEEHGLPHPTVITESGRAVTAHHTVLVSNIIGVERNEYTDPTAPAEDAPRALQNLWETWQEMHKPGTRRSLREWLHDSQMDLHDIHIGYSSGAFSLQERAWAEQLYLSMCHEVQKQLDPQNRAHRPIIDELQERMADKMYVNFSLFQSMPDAWGIDQLFPVLPLEGLDQVPERRAVLLDITCDSDGAIDHYIDGDGIATTMPMPEYDPENPPMLGFFMVGAYQEILGNMHNLFGDTEAVDVFVFPDGSVEVELSDEGDTVADMLQYVQLDPKTLLTHFRDQVKQTDLDDALQQQFLEEFEAGLYGYTYLEDE* ->9 -MNSLPQRSTDFELTTSQDGFALSWQQRLILRHSAENPCLWIGAGVADIDMFRGNFSIKDKLNEKIALTEATVSELPDGWLVQFSRGATISATLRISADEAGRLTLDLQNDDLHHNRIWLRLAANPDDHIYGCGEQFSYFDLRGKPFPLWTSEQGVGRNKTSYVTWQADCKENAGGDYYLTFFPQPTFVSTQKYYCHVDNSCYMNFDFSAPEYHELALWEDKTTLRFECADTYIALLEKLTALLGRQPELPDWVYDGVTLGIQGGTEVCQQKLDNMRNAGVKVYGIWAQDWSGIRMTSFGKRVMWNWKWNSDNYPQLDSRIKQWKEEGVQFLSYINPYVASDKDLCAEAARHGYLAKDATGGDYLVEFGEFYGGVVDLTNPEAYDWFKDVIKKNMIALGCSGWMADFGEYLPTDTYLHNGVSAELMHNAWPALWAKCNYEALQKTGKLGEILFFMRAGYTGSQKYSTMMWAGDQNVDWSLDDGLASVVPAALSLAMTGHGLHHSDIGGYTTLFDMKRSKELLLRWCDFSAFTPMMRTHEGNRPSNDWQFDGDAETIAHFARMTTVFTTLKPYLKQAVAQNAATGLPVMRPLFLHYENDAATYTLKYQYLLGQDLLVAPVHEQGRCDWTLYLPEDHWVNIWTGEAHHGGEISVDAPIGKPPVFYRAKSEWALLFASLRNI* ->10 -MKLFLTTAALTATLTSGMGFASDTVIPWATNSGGTESTHIAAMGEDVNAQHQHIIHTHEGVCAANSGTIQADEAALTSNKPPVQVQPELLPHQG* ->11 -MKHLRVVACMIMLALAGCDNNDKTAPTTKSEAPAVAQPSPAQDPSQLQKLAQQSQGKALTLLDASEAQLDGAATLVLTFSIPLDPEQDFSRVVHVVDKKSGSVDGAWELAPNLKELRLRHLEPERVLVVTVDPAVKALNNATFGKSYEKTITTRDVQPSVGFASRGSLLPGKIAEGLPVMALNVNHVDVNFFRVKPGSLASFVSQWEYRSSLSNWESDNLLKMADLVYTGRFDLNPARNTREKLLLPLSDIKPLQQAGVYVAVMNQAGHYNYSNAATLFTLSDIGVSAHRYHSRLDIFTQSLENGAAQSGIEIVLLNDKGQTLAQATSDAQGHVQLEADKAAALLLARKEGQTTLLDLTLPALDLSEFNVAGAPGYSKQFFMFGPRDLYRPGETVILNGLLRDSDGKTLPDQPVKLEVVKPDGQVMRTVVSQPENGLYRLNYPLDINAPTGLWHVRANTGDNLLRSWDFHVEDFMPERMALNLTAQKTPLAPADEVKFSVVGYYLYGAPANGNTLQGQLFLRPLRDAVAALPGFQFGNIAEENLSRSLDEVQLTLDKGGRGEVSAASQWQEAHSPLQVILQASLLESGGRPVTRRVEQAIWPADTLPGIRPQFAAKAVYDYRTDTTVNQPIVDEDSNAAFDIVYANAQGEKKAVSGLQVRLIRERRDYYWNWSESEGWQSQFDQKDLVEGEQTLDLNADETGKVSFPVEWGAYRLEVKAPNETVSSVRFWAGYSWQDNSDGSGAARPDRVTLKLDKANYRPGDTMKLHIAAPVAGKGYAMVESSDGPLWWQAIDVPAQGLELTIPVDKTWNRHDLYLSTLVVRPGDKSRSATPKRAVGLLHLPLGDDNRRLDLALESPAKMRPNQPLTVRVKASVKHGEMPKQINVLVSAVDSGVLNITDYATPDPWQAFFGQKRYGADIYDIYGQVIEGQGRLAALRFGGDGDDLTRGGKPPVNHANIIAQQAQPITLNEQGEGVVTLPIGDFNGELRVMAQAWTADDFGRGESKVVVAAPVIAELNMPRFLAGGDVSRLVLDVTNLTDRPQTLNIALAASGLLELLSQQPQPVNLAPGVRTTLFVPVRALEGFGEGEIQATISGLNLPGETLGAQHKQWQIGVRPAWPAQTVNSGIALAPGESWHVPEQHLANVSPATLQGQLLLSGKPPLNLARYIRELKAYPYGCLEQTTSGLFPALYTNAAQLQSLGITGDSDEKRRAAVDIGISRILQMQRDNGGFALWDENGAEEPWLTAYAMDFLIRAGEQGYSVPPEAINRGNERLLRYLQDPGTMLIRYSDNTQASTFAAQAYAALVLARQQKAPLGALREIWERRSQAASGLPLMQLGIALNTMGDARRGEEAITLALNTPRQDERQWIADYGSSLRDNALMLSLLEENNLRPDAQNALLSSLSEQAFGQRWLSTQENNALFLAAHSRQASAGAWQAQTSLEAQPLSGDKALTRNLDADQLAALEVTNTGSQPLWLRLDSSGYPSSAPEPASNVLQIERQILGTDGQRKSLSSLRSGELVLVWLTVVADRNVPDALVVDLLPAGLELENQNLADSSASLPESGSEVQNLLNQMQQADIQYMEFRDDRFVAAVVVNEGQPVTLVYLARAVTPGTYQLAQPQVESMYAPQWRATGASEGLLIVTP* ->12 -MAKNRSRRLRKKMHIDEFQELGFSVAWRFPEGTSEEQIDKTVDDFINDVIEPNKLAFDGSGYLAWEGLICMQEIGKCTEEHQAIVRKWLEARNLEEVRTSELFDVWWD* ->13 -LKLIPFYLLALFSASSGATEINACKDLIGTWKTPADNPPYTVTILPPVEACGEKCVKLNVEYELDVTHRNALYCHERQEGVKGQGPMVIAFEGAYGGHAIGTYNRQLQLLRAGVIPKNKKWKWITKMENYWFRRVKAH* ->14 -MKKFDSCHPVFLLIGCAQVPLRSSVSKPVQQPSAQKEQLANANGIDECQSLPYVPSDLAKNKSLSNQIADNTASKNSAISSRIFCEKYKQTKEQALTFFQELPQYMRSKEVEEQHMTEFKKVLLEPGSKNLSIYQTLLAAHERLQAL* ->15 -MRIKPDDNWRWYYDEEHDRMMLDLANGMLFRSRFSRKMLTPDAFCPTGFCVDDAALYFSFEEKCRDFELTKEQRAELVLNALVAIRYLKPQMPKSWHFVAHGEMWTPGTGDAASVWLSDTAEQVNLLVVEPGENAALCLLAQPGVVIAGRTMQLGDAIKIMNDRLKPQVHCHSFSLEQAV* ->16 -VLAFTLRFIKNKRYFAILAGALVIIAGLDSQHACSGNGLPQINGKALAALAKQHPVVVLFRHAERCDRSDNTCLPDSTGITVNGAQDARALGKAFSADIQNYNLYSSNTVRAIQSATWFSAGRSRRAVKKMMDCGSGIYSSINTLLKNSQIKNIVILTHNHCLTYIVKNKRGVKFDPEYLNALVMYAENGKLLLDGEFVPG* ->17 -MGSNYIVIEGLEGAGKTTARDVVVETLEQLGIRNMIFTREPGGTQLAEKLRSLVLDIRSVGDEVITDKAEVLMFYAARVQLVETVIKPALAQGVWVIGDRHDLSTQAYQGGGRGIDQTMLATLRDAVLGDFRPDLTLYLDVTPEVGLKRARARGDLDRIEQESFDFFNRTRARYLELAAQDSRIRTIDATQPLDAVMRDIRATVTKWVQEQAA* ->18 -MKHIKKSVLVVLLTSHVAHASIVVGGTRLVFDGNNDESSINVENKDSKANLVQSWLSVADPQVTNKQVFIITPPLFRLDAGQKNSIRVIRSGAPLPADRESMYWLNIKGIPSIDDNASANRVEISINTQIKLIYRPPALTKSTPDSQSQQLKWQTAGDVITVNNPTPYYMNFASVTLNSHEVKSATFVPPKSSASFKLSSTAAPHGTVTWRLISDYGMSLEPHSGSF* diff --git a/tests/test_data/outputs/search/G10/run.json b/tests/test_data/outputs/search/G10/run.json index 3ea5601..78116a8 100755 --- a/tests/test_data/outputs/search/G10/run.json +++ b/tests/test_data/outputs/search/G10/run.json @@ -1,11 +1,15 @@ { - "analysis_start_time": "10/06/2024 11:12:27", + "analysis_start_time": "19/06/2024 10:43:56", "parameters": { - "query": "locidex/extract/G10/raw.extracted.seqs.fasta", - "outdir": "locidex/search/G10", + "command": "search", + "query": "test_dev/extract/G10/raw.extracted.seqs.fasta", + "outdir": "test_dev/search/G10", + "db": "test_set/db", + "db_group": null, "name": "G10", - "db": "locidex/db", "config": null, + "db_name": "Locidex Test Database", + "db_version": "1.0.0", "min_evalue": 0.0001, "min_dna_len": 1, "min_aa_len": 1, @@ -20,8 +24,16 @@ "format": null, "translation_table": 11, "annotate": false, - "force": true + "force": true, + "db_date": "2024/06/06", + "db_author": "James Robertson", + "db_desc": "Using Salmonella GCA_000007545.2 as source data", + "db_num_seqs": 20, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" }, - "result_file": "locidex/search/G10/seq_store.json", - "analysis_end_time": "10/06/2024 11:12:29" + "result_file": "test_dev/search/G10/seq_store.json", + "analysis_end_time": "19/06/2024 10:43:57" } \ No newline at end of file diff --git a/tests/test_data/outputs/search/G10/seq_store.json b/tests/test_data/outputs/search/G10/seq_store.json index 0834466..66372c7 100755 --- a/tests/test_data/outputs/search/G10/seq_store.json +++ b/tests/test_data/outputs/search/G10/seq_store.json @@ -460,248 +460,286 @@ "parent_id": "locus_1:0:0:0", "locus_name": "locus_1:0:0:0", "seq_id": "locus_1:0:0:0", + "dna_seq": "atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa", + "dna_ambig_count": 0, "dna_hash": "d17b02d12afa7f832ee37df6f24a8f55", "dna_len": 102, + "aa_seq": "MYDPPFLEALMITASFFAIFIIIVVSVLLLEGD*", "aa_hash": "a931d1f75114576e60538364eb01a05f", "aa_len": 34, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "1": { "parent_id": "locus_10:9:0:1", "locus_name": "locus_10:9:0:1", "seq_id": "locus_10:9:0:1", + "dna_seq": "atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa", + "dna_ambig_count": 0, "dna_hash": "fe04d17ec353c08b903c85fc0ca4dc02", "dna_len": 762, + "aa_seq": "MRILLSNDDGVHAPGIQTLAKALREFADVQVVAPDRNRSGASNSLTLESSLRTFTFDNGDIAVQMGTPTDCVYLGVNALMRPRPDIVVSGINAGPNLGDDVIYSGTVAAAMEGRHLGFPALAVSLNGYQHYDTAAAVTCALLRGLSREPLRTGRILNVNVPDLPLAQVKGIRVTRCGSRHPADKVIPQEDPRGNTLYWIGPPGDKYDAGPDTDFAAVDEGYVSVTPLHVDLTAHSAHDVVSDWLDSVGVGTQW*", "aa_hash": "988bf512f0362e276b0e5622fbaa7079", "aa_len": 254, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "2": { "parent_id": "locus_12:11:0:2", "locus_name": "locus_12:11:0:2", "seq_id": "locus_12:11:0:2", + "dna_seq": "atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga", + "dna_ambig_count": 0, "dna_hash": "eb72da68c159497d5f0c8eeddc51b5ae", "dna_len": 972, + "aa_seq": "MNKNKYSTPLLMLATILAGMLSPMQSAVNGQLGHWLQDGNACAVISFASGLVVMFFIIIARKETRQQFASIPTLIKKRKIPLWNWFAGLCGAMVVFSEGASASALGVATFQTALISALLLSGLLCDRFGIGVEEKKYFTPWRITGALFAVIATIFVVSPQWHSTSFILLAILPFLAGLLAGWQPAGNAKVAEATGSMLVSITWNFIVGFCVLGAALAIRIALGHVTIQLPDTWWMYLGGPLGLLSIGLMAILVRGLGLLMLGVASTAGQLLGSVLIDELIPSLGNTVYLVTIIGTLFALVGAIVTTIPEYRASKMAQKMEVSE*", "aa_hash": "2fb77f6d0f615a030a08af74f3eda277", "aa_len": 324, "start_codon": "atg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "3": { "parent_id": "locus_13:12:0:3", "locus_name": "locus_13:12:0:3", "seq_id": "locus_13:12:0:3", + "dna_seq": "atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga", + "dna_ambig_count": 0, "dna_hash": "8f300259dcb46224bdc1fe5273107324", "dna_len": 1098, + "aa_seq": "MKGRWAKYVATGVMLAMLAACSSKPTDRGQQYKDGKFTQPFSLVNQPDAVGAPINAGDFAEQVDQIRSASPRLYTNQSNVYNAVQNWLRSGGDTRTMRQFGIDAWQMEGTDNYGNVQFTGYYTPVVQARHTRQGAFQYPIYSMPPKRGRLPSRAQIYAGALSDKYILAWSNSLMDNFIMDVQGSGYIDFGDGSPLNFFSYAGKNGWPYRSIGKVLIDRGEVKKEDMSMQAIREWGEKHSEAEVRELLEQNPSFVFFKPQSFAPVKGASAVPLIGRASVASDRSIIPPGTTLLAEVPLLDNNGKFSGQYELRLMVALDVGGAIKGQHFDIYQGIGPDAGHRAGWYNHYGRVWVLKSAPGAGNVFSG*", "aa_hash": "fd5140611af6e0d9f95426bf9beaa8b1", "aa_len": 366, "start_codon": "atg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "4": { "parent_id": "locus_14:13:0:4", "locus_name": "locus_14:13:0:4", "seq_id": "locus_14:13:0:4", + "dna_seq": "ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctattacgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccagcgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcggaaaggcgggattggttcgcgcggtccgggtgaaacccagctcgaagccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgagaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgcagctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaattccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtccgcaggtacgaatgccgattgtcgactggcgtcgcttctgtaaacaagaaccagcgttgatcgaatacgtgatctag", + "dna_ambig_count": 0, "dna_hash": "b9060019038526aa6fc38d2f7510edc6", "dna_len": 1281, + "aa_seq": "LFDRYDAGEQAVLVHIYFSQDKDMEDLQEFESLVYYAGVEAMQVITGSRKAPHPKYFVGEGKAVEIAEAVKATGAAVVLFDHALSPAQERNLERLCECRVIDRTGLILDIFAQRARTHEGKLQVELAQLRHLATRLVRGWTHLERRKGGIGSRGPGETQLEADRRLLRNRIVQIQSRLEKVEKQREQGRQSRIKADVPTVSLVGYTNAGKSTLFNQITEARVYAADQLFATLDPTLRRIDVADVGETVLADTVGFIRHLPYDLVAAFKATLQETRQATQLLHVVDAADVRVQENIEAVNTVLEEIDAHEFPTLMVMNKIDMLDDFEPRIDRDEENKPIRVWLSAQSGVGIPQLFQALTERLSGEVAQHTLRLLPQEGRLRSRFYQLQAIEKEWMEEDGSVSPQVRMPIVDWRRFCKQEPALIEYVI*", "aa_hash": "05bc7823b1abc2e6d4e2c08ca5325134", "aa_len": 427, "start_codon": "ttg", - "stop_codon": "tag", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tag", + "count_internal_stop": 0 }, "5": { "parent_id": "locus_15:14:0:5", "locus_name": "locus_15:14:0:5", "seq_id": "locus_15:14:0:5", + "dna_seq": "gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga", + "dna_ambig_count": 0, "dna_hash": "bc98c2fe196a68a79036814396513a8d", "dna_len": 1434, + "aa_seq": "VAQRILVLGASGYIGQHLVFALSQQGHQVRAAARRVERLEKHRLANVSCHKVDLHWPENLPALLRDIDTVYYLVHGMGEGGDFIAHERQAALNVRDALRQTPVKQLIFLSSLQAPAHEQSDHLRARQLTADTLRDAGVPVTELRAGIIVGAGSAAFEVMRDMVYNLPILTPPRWVRSRTTPIALENLLYYLVGLLEHPAHEHRILEAAGPQVLSYQQQFERFMAVSGKRRPLIPVPFPTRWISVWFLNVITSVPPTTAKALIQGLRHDLLADDAALKKLIPQTLITFDDAVRRTLKEEEKLVNSSDWGYDALAFARWRPEYGYFPKQAGFTAQTPASLSALWQVVNRLGGKEGYFFGNILWQTRAAMDRLVGHKLAKGRPSHTLLKPGDTVDSWKVIIVEPEKQLTLLFGMKAPGLGRLSFTLHDKGRYREIDVRAWWHPHGMPGLIYWLLMIPAHLFIFRGMARRIARLAEQITEK*", "aa_hash": "5d8c3cf06444c3741ebfc57cc92db0d3", "aa_len": 478, "start_codon": "gtg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "6": { "parent_id": "locus_16:15:0:6", "locus_name": "locus_16:15:0:6", "seq_id": "locus_16:15:0:6", + "dna_seq": "atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctcgtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatctgcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacggtgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcactcaaccggaagggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaaggctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgatgcgctgtcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtaagctgatcagccttgtgcgcaaccttaatcaactgctcaaaagcgagcacgaacgttataacaaataccgcacgagcctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatctgcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcatagcccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttctggccgacagcatcccacacagaaagaggaataa", + "dna_ambig_count": 0, "dna_hash": "16e55766c603fe33c9e75d8e81743ae2", "dna_len": 1464, + "aa_seq": "MNKFARHFLPLSLRVRFLLATAGVVLVLSLAYGIVALVGYSVSFDKTTFRLLRGESNLFYTLAKWENNKICVELPENLDMQSPTMTVIYDETGKLLWTQRNIPWLIKSTQPEGLKTNGFHEIETNVDATSTLLSEDHSAQEKLKEVREDDDDAEMTHSVAVNIYPATTRMPQLTIVVVDTIPIELKGSYMVWSWFVYVLAANLLLVIPLLWIAAWWSLRPIDALSREVRELEDHHREMLNPETTRKLISLVRNLNQLLKSEHERYNKYRTSLTDLTHSLKTPLAVLQSTLRSLRNEKMSVSKAEPVMLEQISRISQQIGYYLHRASMRGSGVLLSRELHPVAPLLDNLISALNKVYLRKGVNISMDISPEISFVGEQNDFVEVMGNVLDNACKYCLEFVEISARQTDDHLHIFVEDDGPGIAHSKRSLVFDRGQRADTLRPGQGVGLAVAREITEQYAGQIIASDSLLGGARMEVVSGRQHPTQKEE*", "aa_hash": "f85b3701f5642454bf4d2263feb13354", "aa_len": 488, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "7": { "parent_id": "locus_17:16:0:7", "locus_name": "locus_17:16:0:7", "seq_id": "locus_17:16:0:7", + "dna_seq": "atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa", + "dna_ambig_count": 0, "dna_hash": "a0d97d985483413f3c18bfe5833ae9ce", "dna_len": 1836, + "aa_seq": "MTIQKRLLEAVEQKLLRPIDAQFALTVAGNDDPAVTLAAALLSHDAGEGHVCLPLSRLTLTEEAHPLLVAWISETATPIDWKKRLLASAAVSCGDSPAPLILCGDRLYLNRMWCNERTVARFFNEVNQAIAVDEDQLSRILDALFPPTDEVNWQKVAAAVALTRRISVISGGPGTGKTTTVAKLLAALIQMADGERCRIRLAAPTGKAAARLTESLGAALRQLPLTDAQKKRIPEDASTLHRLLGAQPGSQRLRHHAGNPLHLDVLVVDEASMIDLPMMSRLIDALPPHGRVIFLGDRDQLASVEAGAVLGDICAYVNAGFTAERARQLSRLTGSAIPAGAGTQAASLRDSLCLLQKSYRFGSDSGIGKLAAAINCGDRSAIQAVFQQGFSDIEKRTLQSSDDYAGMLDEALAGYGRYLRLLHEKAAPEAILQAFNEYQLLCALREGPFGVRGLNDRIEQAMVQQRKIQRHPHSRWYEGRPVMIARNDSALGLFNGDIGIALDRGQGLRVWFVMPDGTIKSVQPSRLPEHDTTWAMTVHKSQGSEFDHAALILPSQRSPVVTRELVYTAVTRARRRLSLYADERILAGAIVTRTERRSGLATLFDEVSRIG*", "aa_hash": "bdd2b967a7a6337b5973dd723a62fa43", "aa_len": 612, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "8": { "parent_id": "locus_18:17:0:8", "locus_name": "locus_18:17:0:8", "seq_id": "locus_18:17:0:8", + "dna_seq": "atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag", + "dna_ambig_count": 0, "dna_hash": "b3021e979faa7600756c06dfadfcf14c", "dna_len": 1914, + "aa_seq": "MQEVAMSSQEASKMLRTYNIAWWGNNYYDVNELGHISVCPDPDVPEARVDLAKLVKAREAQGQRLPALFCFPQILQHRLRSINAAFKRARESYGYNGDYFLVYPIKVNQHRRVIESLIHSGEPLGLEAGSKAELMAVLAHAGMTRSVIVCNGYKDREYIRLALIGEKMGHKVYLVIEKMSEIAIVLEEAERLNVVPRLGVRARLASQGSGKWQSSGGEKSKFGLAATQVLQLVETLRDAGRLDSLQLLHFHLGSQMANIRDIATGVRESARFYVELHKLGVNIQCFDVGGGLGVDYEGTRSQSDCSVNYGLNEYANNIIWAIGDACEEHGLPHPTVITESGRAVTAHHTVLVSNIIGVERNEYTDPTAPAEDAPRALQNLWETWQEMHKPGTRRSLREWLHDSQMDLHDIHIGYSSGAFSLQERAWAEQLYLSMCHEVQKQLDPQNRAHRPIIDELQERMADKMYVNFSLFQSMPDAWGIDQLFPVLPLEGLDQVPERRAVLLDITCDSDGAIDHYIDGDGIATTMPMPEYDPENPPMLGFFMVGAYQEILGNMHNLFGDTEAVDVFVFPDGSVEVELSDEGDTVADMLQYVQLDPKTLLTHFRDQVKQTDLDDALQQQFLEEFEAGLYGYTYLEDE*", "aa_hash": "42c4a831ee79a27c47138fe96829814b", "aa_len": 638, "start_codon": "atg", - "stop_codon": "tag", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tag", + "count_internal_stop": 0 }, "9": { "parent_id": "locus_19:18:0:9", "locus_name": "locus_19:18:0:9", "seq_id": "locus_19:18:0:9", + "dna_seq": "atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacatcgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccacagtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggtcgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattacttgaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcatacagggcggtacggaagtttgtcagcaaaaactggataacatgcgcaacgcaggcgtaaaagtgtacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggggtcagcgcagagctcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacagaagaccggcaagctcggcgagatcctgttctttatgcgggcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccagcaatgactggcagttcgacggcgacgcggaaactattgcacactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgagatttccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa", + "dna_ambig_count": 0, "dna_hash": "a012eee23637b48e39b00808a057e35d", "dna_len": 2037, + "aa_seq": "MNSLPQRSTDFELTTSQDGFALSWQQRLILRHSAENPCLWIGAGVADIDMFRGNFSIKDKLNEKIALTEATVSELPDGWLVQFSRGATISATLRISADEAGRLTLDLQNDDLHHNRIWLRLAANPDDHIYGCGEQFSYFDLRGKPFPLWTSEQGVGRNKTSYVTWQADCKENAGGDYYLTFFPQPTFVSTQKYYCHVDNSCYMNFDFSAPEYHELALWEDKTTLRFECADTYIALLEKLTALLGRQPELPDWVYDGVTLGIQGGTEVCQQKLDNMRNAGVKVYGIWAQDWSGIRMTSFGKRVMWNWKWNSDNYPQLDSRIKQWKEEGVQFLSYINPYVASDKDLCAEAARHGYLAKDATGGDYLVEFGEFYGGVVDLTNPEAYDWFKDVIKKNMIALGCSGWMADFGEYLPTDTYLHNGVSAELMHNAWPALWAKCNYEALQKTGKLGEILFFMRAGYTGSQKYSTMMWAGDQNVDWSLDDGLASVVPAALSLAMTGHGLHHSDIGGYTTLFDMKRSKELLLRWCDFSAFTPMMRTHEGNRPSNDWQFDGDAETIAHFARMTTVFTTLKPYLKQAVAQNAATGLPVMRPLFLHYENDAATYTLKYQYLLGQDLLVAPVHEQGRCDWTLYLPEDHWVNIWTGEAHHGGEISVDAPIGKPPVFYRAKSEWALLFASLRNI*", "aa_hash": "cb1202450e68e2b4f0d557a645f1a98d", "aa_len": 679, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "10": { "parent_id": "locus_2:1:0:10", "locus_name": "locus_2:1:0:10", "seq_id": "locus_2:1:0:10", + "dna_seq": "atgaaactttttctgacaacggcagcgctaactgcgaccttgacttccggcatgggatttgccagcgataccgtgatcccctgggcgactaacagtggcggtacggaaagcacgcatatcgcggcaatgggcgaggatgtgaatgcgcagcatcagcacatcatccacacgcatgaaggcgtatgtgccgcaaattccggcaccattcaggctgatgaagccgcgctgaccagcaacaagccaccggtacaagtccaaccggaattgttgccgcatcagggctaa", + "dna_ambig_count": 0, "dna_hash": "e35184c8ff18e9116fc8faef20532f56", "dna_len": 285, + "aa_seq": "MKLFLTTAALTATLTSGMGFASDTVIPWATNSGGTESTHIAAMGEDVNAQHQHIIHTHEGVCAANSGTIQADEAALTSNKPPVQVQPELLPHQG*", "aa_hash": "2a1a77c25ad681437705d9145aef608c", "aa_len": 95, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "11": { "parent_id": "locus_20:19:0:11", "locus_name": "locus_20:19:0:11", "seq_id": "locus_20:19:0:11", + "dna_seq": "atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa", + "dna_ambig_count": 0, "dna_hash": "4461918e985715e4a2b07494e1f91326", "dna_len": 4935, + "aa_seq": "MKHLRVVACMIMLALAGCDNNDKTAPTTKSEAPAVAQPSPAQDPSQLQKLAQQSQGKALTLLDASEAQLDGAATLVLTFSIPLDPEQDFSRVVHVVDKKSGSVDGAWELAPNLKELRLRHLEPERVLVVTVDPAVKALNNATFGKSYEKTITTRDVQPSVGFASRGSLLPGKIAEGLPVMALNVNHVDVNFFRVKPGSLASFVSQWEYRSSLSNWESDNLLKMADLVYTGRFDLNPARNTREKLLLPLSDIKPLQQAGVYVAVMNQAGHYNYSNAATLFTLSDIGVSAHRYHSRLDIFTQSLENGAAQSGIEIVLLNDKGQTLAQATSDAQGHVQLEADKAAALLLARKEGQTTLLDLTLPALDLSEFNVAGAPGYSKQFFMFGPRDLYRPGETVILNGLLRDSDGKTLPDQPVKLEVVKPDGQVMRTVVSQPENGLYRLNYPLDINAPTGLWHVRANTGDNLLRSWDFHVEDFMPERMALNLTAQKTPLAPADEVKFSVVGYYLYGAPANGNTLQGQLFLRPLRDAVAALPGFQFGNIAEENLSRSLDEVQLTLDKGGRGEVSAASQWQEAHSPLQVILQASLLESGGRPVTRRVEQAIWPADTLPGIRPQFAAKAVYDYRTDTTVNQPIVDEDSNAAFDIVYANAQGEKKAVSGLQVRLIRERRDYYWNWSESEGWQSQFDQKDLVEGEQTLDLNADETGKVSFPVEWGAYRLEVKAPNETVSSVRFWAGYSWQDNSDGSGAARPDRVTLKLDKANYRPGDTMKLHIAAPVAGKGYAMVESSDGPLWWQAIDVPAQGLELTIPVDKTWNRHDLYLSTLVVRPGDKSRSATPKRAVGLLHLPLGDDNRRLDLALESPAKMRPNQPLTVRVKASVKHGEMPKQINVLVSAVDSGVLNITDYATPDPWQAFFGQKRYGADIYDIYGQVIEGQGRLAALRFGGDGDDLTRGGKPPVNHANIIAQQAQPITLNEQGEGVVTLPIGDFNGELRVMAQAWTADDFGRGESKVVVAAPVIAELNMPRFLAGGDVSRLVLDVTNLTDRPQTLNIALAASGLLELLSQQPQPVNLAPGVRTTLFVPVRALEGFGEGEIQATISGLNLPGETLGAQHKQWQIGVRPAWPAQTVNSGIALAPGESWHVPEQHLANVSPATLQGQLLLSGKPPLNLARYIRELKAYPYGCLEQTTSGLFPALYTNAAQLQSLGITGDSDEKRRAAVDIGISRILQMQRDNGGFALWDENGAEEPWLTAYAMDFLIRAGEQGYSVPPEAINRGNERLLRYLQDPGTMLIRYSDNTQASTFAAQAYAALVLARQQKAPLGALREIWERRSQAASGLPLMQLGIALNTMGDARRGEEAITLALNTPRQDERQWIADYGSSLRDNALMLSLLEENNLRPDAQNALLSSLSEQAFGQRWLSTQENNALFLAAHSRQASAGAWQAQTSLEAQPLSGDKALTRNLDADQLAALEVTNTGSQPLWLRLDSSGYPSSAPEPASNVLQIERQILGTDGQRKSLSSLRSGELVLVWLTVVADRNVPDALVVDLLPAGLELENQNLADSSASLPESGSEVQNLLNQMQQADIQYMEFRDDRFVAAVVVNEGQPVTLVYLARAVTPGTYQLAQPQVESMYAPQWRATGASEGLLIVTP*", "aa_hash": "ffe361d9273afbab0ed9009dd5aa4041", "aa_len": 1645, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "12": { "parent_id": "locus_3:2:0:12", "locus_name": "locus_3:2:0:12", "seq_id": "locus_3:2:0:12", + "dna_seq": "atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa", + "dna_ambig_count": 0, "dna_hash": "670705cd2a59c4a23a897ac656a888fe", "dna_len": 327, + "aa_seq": "MAKNRSRRLRKKMHIDEFQELGFSVAWRFPEGTSEEQIDKTVDDFINDVIEPNKLAFDGSGYLAWEGLICMQEIGKCTEEHQAIVRKWLEARNLEEVRTSELFDVWWD*", "aa_hash": "f46f4f7b2362abaf1440c9752e10670e", "aa_len": 109, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "13": { "parent_id": "locus_4:3:0:13", "locus_name": "locus_4:3:0:13", "seq_id": "locus_4:3:0:13", + "dna_seq": "ctgaaactaatacctttttatttgctcgcgctattttctgcctcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaacccctgcagataacccgccttataccgtgacaatattgccgccagtagaagcctgcggggaaaaatgtgtgaaactgaacgtagagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaacgacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctacaaccgacagctgcaattacttagggctggcgtaataccgaaaaataaaaagtggaaatggataacgaagatggaaaattactggttcaggcgggtaaaagcgcactaa", + "dna_ambig_count": 0, "dna_hash": "ac1b21798c0f672ad26f5a91ea278590", "dna_len": 417, + "aa_seq": "LKLIPFYLLALFSASSGATEINACKDLIGTWKTPADNPPYTVTILPPVEACGEKCVKLNVEYELDVTHRNALYCHERQEGVKGQGPMVIAFEGAYGGHAIGTYNRQLQLLRAGVIPKNKKWKWITKMENYWFRRVKAH*", "aa_hash": "dbcec3a0e9ecdc165c4e9162b079f2ee", "aa_len": 139, "start_codon": "ctg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "14": { "parent_id": "locus_5:4:0:14", "locus_name": "locus_5:4:0:14", "seq_id": "locus_5:4:0:14", + "dna_seq": "atgaaaaaatttgatagctgtcatcctgtctttttactgatcggctgtgcccaggtgcctctccgttcctccgtgagcaaaccggtacagcaacctagcgctcagaaagagcaactggccaacgcaaatggtattgatgagtgtcagtcacttccgtatgtgccttcagaccttgcgaagaataaatcattatcaaaccagatcgctgataataccgcatcaaaaaatagcgcaatcagctcacgcattttttgcgaaaaatataagcaaaccaaagaacaggcgcttaccttcttccaggaacttccacaatacatgcgttcgaaagaagttgaagagcaacacatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa", + "dna_ambig_count": 0, "dna_hash": "d00defcca8588f21ce16fa1d0ac13389", "dna_len": 444, + "aa_seq": "MKKFDSCHPVFLLIGCAQVPLRSSVSKPVQQPSAQKEQLANANGIDECQSLPYVPSDLAKNKSLSNQIADNTASKNSAISSRIFCEKYKQTKEQALTFFQELPQYMRSKEVEEQHMTEFKKVLLEPGSKNLSIYQTLLAAHERLQAL*", "aa_hash": "82d8baa0a3dad18a0efd8104ee15baae", "aa_len": 148, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "15": { "parent_id": "locus_6:5:0:15", "locus_name": "locus_6:5:0:15", "seq_id": "locus_6:5:0:15", + "dna_seq": "atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa", + "dna_ambig_count": 0, "dna_hash": "a11561f2804e2c32c78049f8b9aeb517", "dna_len": 543, + "aa_seq": "MRIKPDDNWRWYYDEEHDRMMLDLANGMLFRSRFSRKMLTPDAFCPTGFCVDDAALYFSFEEKCRDFELTKEQRAELVLNALVAIRYLKPQMPKSWHFVAHGEMWTPGTGDAASVWLSDTAEQVNLLVVEPGENAALCLLAQPGVVIAGRTMQLGDAIKIMNDRLKPQVHCHSFSLEQAV*", "aa_hash": "f7a904be6607ca5a6d9388ad9ca3693a", "aa_len": 181, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "16": { "parent_id": "locus_7:6:0:16", "locus_name": "locus_7:6:0:16", "seq_id": "locus_7:6:0:16", + "dna_seq": "gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggactcacaacatgcctgtagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgccagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcgccatccagtcggcaacctggttttcggcaggcaggtcgcgtagggcggttaaaaaaatgatggattgcggcagcggtatttattcttcaatcaatacgttactgaaaaattcgcagattaaaaatatagtcattttaacccataatcattgtttgacgtatattgtaaaaaataagcgcggcgtgaagtttgatccggagtatcttaatgcgctcgttatgtatgcggaaaacggaaaactccttttagacggtgaatttgttcccggctag", + "dna_ambig_count": 0, "dna_hash": "dc94bf1ec4ff9bed2a1f460cbd958656", "dna_len": 606, + "aa_seq": "VLAFTLRFIKNKRYFAILAGALVIIAGLDSQHACSGNGLPQINGKALAALAKQHPVVVLFRHAERCDRSDNTCLPDSTGITVNGAQDARALGKAFSADIQNYNLYSSNTVRAIQSATWFSAGRSRRAVKKMMDCGSGIYSSINTLLKNSQIKNIVILTHNHCLTYIVKNKRGVKFDPEYLNALVMYAENGKLLLDGEFVPG*", "aa_hash": "da78b534d889d8f35bec304ef54f1b93", "aa_len": 202, "start_codon": "gtg", - "stop_codon": "tag", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tag", + "count_internal_stop": 0 }, "17": { "parent_id": "locus_8:7:0:17", "locus_name": "locus_8:7:0:17", "seq_id": "locus_8:7:0:17", + "dna_seq": "atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga", + "dna_ambig_count": 0, "dna_hash": "7ebe74afecf146ec4db816c8deced64f", "dna_len": 642, + "aa_seq": "MGSNYIVIEGLEGAGKTTARDVVVETLEQLGIRNMIFTREPGGTQLAEKLRSLVLDIRSVGDEVITDKAEVLMFYAARVQLVETVIKPALAQGVWVIGDRHDLSTQAYQGGGRGIDQTMLATLRDAVLGDFRPDLTLYLDVTPEVGLKRARARGDLDRIEQESFDFFNRTRARYLELAAQDSRIRTIDATQPLDAVMRDIRATVTKWVQEQAA*", "aa_hash": "ad5a47cb3a55985083b963b0cf67f382", "aa_len": 214, "start_codon": "atg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "18": { "parent_id": "locus_9:8:0:18", "locus_name": "locus_9:8:0:18", "seq_id": "locus_9:8:0:18", + "dna_seq": "atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag", + "dna_ambig_count": 0, "dna_hash": "41ebb36872854b2b33c8c028e23d8ad1", "dna_len": 684, + "aa_seq": "MKHIKKSVLVVLLTSHVAHASIVVGGTRLVFDGNNDESSINVENKDSKANLVQSWLSVADPQVTNKQVFIITPPLFRLDAGQKNSIRVIRSGAPLPADRESMYWLNIKGIPSIDDNASANRVEISINTQIKLIYRPPALTKSTPDSQSQQLKWQTAGDVITVNNPTPYYMNFASVTLNSHEVKSATFVPPKSSASFKLSSTAAPHGTVTWRLISDYGMSLEPHSGSF*", "aa_hash": "02989d1dbb505d15f06e76ddc9174b7e", "aa_len": 228, "start_codon": "atg", - "stop_codon": "tag", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tag", + "count_internal_stop": 0 } }, "query_hit_columns": [], diff --git a/tests/test_data/outputs/search/G11/blast/nucleotide/hsps.txt b/tests/test_data/outputs/search/G11/blast/nucleotide/hsps.txt deleted file mode 100755 index ee96839..0000000 --- a/tests/test_data/outputs/search/G11/blast/nucleotide/hsps.txt +++ /dev/null @@ -1,21 +0,0 @@ -0 0 102 102 1 102 1 102 102 0 100.000 100 100 plus 1.80e-51 189 -1 9 762 762 1 762 1 762 762 0 100.000 100 100 plus 0.0 1408 -2 10 858 858 1 858 1 858 858 19 97.786 100 100 plus 0.0 1480 -3 11 972 972 1 972 1 972 972 0 100.000 100 100 plus 0.0 1796 -4 11 972 972 1 972 1 972 972 0 100.000 100 100 plus 0.0 1796 -5 12 1098 1098 1 1098 1 1098 1098 0 100.000 100 100 plus 0.0 2028 -6 13 1281 1281 1 1281 1 1281 1281 11 99.141 100 100 plus 0.0 2305 -7 14 1434 1434 1 1434 1 1434 1434 0 100.000 100 100 plus 0.0 2649 -8 15 1464 1464 1 1464 1 1464 1464 15 98.975 100 100 plus 0.0 2621 -9 16 1836 1836 1 1836 1 1836 1836 0 100.000 100 100 plus 0.0 3391 -10 17 1914 1914 1 1914 1 1914 1914 0 100.000 100 100 plus 0.0 3535 -11 18 2037 2037 1 2037 1 2037 2037 16 99.215 100 100 plus 0.0 3674 -12 1 285 285 1 285 1 285 285 17 94.035 100 100 plus 2.31e-124 433 -13 19 4935 4935 1 4935 1 4935 4935 0 100.000 100 100 plus 0.0 9114 -14 2 327 327 1 327 1 327 327 0 100.000 100 100 plus 5.34e-176 604 -15 3 417 417 1 417 1 417 417 11 97.362 100 100 plus 0.0 710 -16 4 444 444 1 444 1 444 444 15 96.622 100 100 plus 0.0 737 -17 5 543 543 1 543 1 543 543 0 100.000 100 100 plus 0.0 1003 -18 6 606 606 1 606 1 606 606 15 97.525 100 100 plus 0.0 1037 -19 7 642 642 1 642 1 642 642 0 100.000 100 100 plus 0.0 1186 -20 8 684 684 1 684 1 684 684 0 100.000 100 100 plus 0.0 1264 diff --git a/tests/test_data/outputs/search/G11/blast/nucleotide/queries.fasta b/tests/test_data/outputs/search/G11/blast/nucleotide/queries.fasta deleted file mode 100755 index 1e6d0ea..0000000 --- a/tests/test_data/outputs/search/G11/blast/nucleotide/queries.fasta +++ /dev/null @@ -1,42 +0,0 @@ ->0 -atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa ->1 -atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa ->2 -gtgaacctggtgaaaaccacgggtctgcacgcggcgggcaaaggcattaacgttgccaaagtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccgacttcaacttttccggctttgacgtgactcccgcagactgggaacgctttgttaacgactccctgagctggctgggtcagttcgatatggtctgcgtcagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatctttgatagtagccgtgaagcgttagtcgccggtcttaaagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcacgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgtgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggtaagtaccgtcggcgcaggcgattccatggttggcggattgatttacggcctgctgatgcgcgagtccagcgaacatacgctgcgtctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga ->3 -atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga ->4 -atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga ->5 -atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga ->6 -ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctcttccgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccaacgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcagaaaggcgggattggtttgcgcggtccgggtgaaacccagctcgaaaccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgaaaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgctgctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaaatccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtctgcaggtacgaatgccgattgtcgactggcgtcgcctctgtaaacaagaaccagcgttgatcgaatacgtgatctag ->7 -gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga ->8 -atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctggtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatcagcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacgctgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcattcaaccggaatggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaacgctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgaggcgctggcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtgagctgaccagccttgtgcgcaaccttaatcaactgctcaaaagcgagcgcgaacgttataacaaataccgcacgaccctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatcagcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcataccccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttttggccgacagcatcccacacagaaagaggaataa ->9 -atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa ->10 -atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag ->11 -atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacattgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccaccgtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggccgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattactggaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcattcagggcggtacggaagtttgtcagcaaaaactggataccatgcgcaacgcaggcgtaaaagtgaacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggcgtcagcgcagagatcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacaggagaccggcaagctcggcgagatcctgttctttatgcgtgcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccggcaataactggcagttcgacggcgacgcggaaactattgcccactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgaaattaccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa ->12 -atgaaactttttctgacaacggcagcgctaactgcgaccttgatttccggcatggcatttgccagcgatcccgtcatcccctgggcgactaacagcggcggtacggaaagcacgcatattgcggcaatgggcgaggatttgaatgcgcagcatcagcagatcacccacacgcatgaaggcgtatgggcggccaattccggcagcattcaggctgatgaagccgcgctgaccagcaacaagccgccggtacaaggccacccggaattgatgccgcatcagggctaa ->13 -atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa ->14 -atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa ->15 -ctgaaactaatacctttttatttgctcgcgctattttctgccgcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaaccactgcagataacccgccttataccatgacaatattgccgccagtagaaggctgcggggaaaaatgtgtgaaactgaacgtacagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaaggacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctataaccgacagctgcaattactttgggctggcgtaataccgaaaaataaaaaggggaaatggataacgaaaatggaaaattactggttcaggcaggtaaaagcgcactaa ->16 -atgaaaaaattttatagctgtcttcctgtctttttactgatcggctgtgcccaggtgcccctcccttcctccgtgagcaaaccggtacagcaacctggcgctcagaaagagcaactggccaacgcaaatagtattgatgagtgtcagtctcttccgtatgtgccgtcagaccttgcgaagaataaatcattatcaaaccagaacgctgataattccgcatcaaaaaatagcgcaatcagctcaagcattttttgcgaaaaatataaacaaaccaaagaacaggcgcttaccttcttccaggaacatccacaatacatgcgttcgaaagaagatgaagagcaactcatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa ->17 -atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa ->18 -gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggcctcacaacatgcctggagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgtcagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcaccatccagtcggcaacctggttttccgcaggcaggtcgcttacggcggataaaaaaatgatggattgcggcagcggtatttatgcttcaatcaatacgttactgaaaaagtcgcagaataaaaatatagtcatttttacccataatcattgtttgacgtatattgcaaaaaataagcgcggcgtgaagtttgatccggactatcttaatgcgctcgttatgtatgcggaaaacggaaaactctttttagacggtgaatttgttcccggctag ->19 -atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga ->20 -atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/search/G11/blast/protein/hsps.txt b/tests/test_data/outputs/search/G11/blast/protein/hsps.txt deleted file mode 100755 index f9da50e..0000000 --- a/tests/test_data/outputs/search/G11/blast/protein/hsps.txt +++ /dev/null @@ -1,21 +0,0 @@ -0 0 34 34 1 34 1 34 34 0 100.000 100 100 N/A 4.94e-20 64.3 -1 9 254 254 1 254 1 254 254 0 100.000 100 100 N/A 0.0 514 -2 10 286 286 1 286 1 286 286 17 94.056 100 100 N/A 0.0 518 -3 11 324 324 1 324 1 324 324 0 100.000 100 100 N/A 0.0 640 -4 11 324 324 1 324 1 324 324 0 100.000 100 100 N/A 0.0 640 -5 12 366 366 1 366 1 366 366 0 100.000 100 100 N/A 0.0 754 -6 13 427 427 1 427 1 427 427 9 97.892 100 100 N/A 0.0 846 -7 14 478 478 1 478 1 478 478 0 100.000 100 100 N/A 0.0 972 -8 15 488 488 1 488 1 488 488 14 97.131 100 100 N/A 0.0 972 -9 16 612 612 1 612 1 612 612 0 100.000 100 100 N/A 0.0 1241 -10 17 638 638 1 638 1 638 638 0 100.000 100 100 N/A 0.0 1326 -11 18 679 679 1 679 1 679 679 8 98.822 100 100 N/A 0.0 1403 -12 1 95 95 1 95 1 95 95 11 88.421 100 100 N/A 2.89e-51 147 -13 19 1645 1645 1 1645 1 1645 1645 0 100.000 100 100 N/A 0.0 3332 -14 2 109 109 1 109 1 109 109 0 100.000 100 100 N/A 3.09e-81 224 -15 3 139 139 1 139 1 139 139 9 93.525 100 100 N/A 3.70e-98 269 -16 4 148 148 1 148 1 148 148 11 92.568 100 100 N/A 1.56e-97 268 -17 5 181 181 1 181 1 181 181 0 100.000 100 100 N/A 1.79e-140 380 -18 6 202 202 1 202 1 202 202 14 93.069 100 100 N/A 2.36e-141 384 -19 7 214 214 1 214 1 214 214 0 100.000 100 100 N/A 1.75e-157 426 -20 8 228 228 1 228 1 228 228 0 100.000 100 100 N/A 6.32e-172 463 diff --git a/tests/test_data/outputs/search/G11/blast/protein/queries.fasta b/tests/test_data/outputs/search/G11/blast/protein/queries.fasta deleted file mode 100755 index 7d902a9..0000000 --- a/tests/test_data/outputs/search/G11/blast/protein/queries.fasta +++ /dev/null @@ -1,42 +0,0 @@ ->0 -MYDPPFLEALMITASFFAIFIIIVVSVLLLEGD* ->1 -MRILLSNDDGVHAPGIQTLAKALREFADVQVVAPDRNRSGASNSLTLESSLRTFTFDNGDIAVQMGTPTDCVYLGVNALMRPRPDIVVSGINAGPNLGDDVIYSGTVAAAMEGRHLGFPALAVSLNGYQHYDTAAAVTCALLRGLSREPLRTGRILNVNVPDLPLAQVKGIRVTRCGSRHPADKVIPQEDPRGNTLYWIGPPGDKYDAGPDTDFAAVDEGYVSVTPLHVDLTAHSAHDVVSDWLDSVGVGTQW* ->2 -VNLVKTTGLHAAGKGINVAKVLKDLGIDVTVGGFLGKDNQDGFQQLFSELGIANRFQVVQGRTRINVKLTEKDGEVTDFNFSGFDVTPADWERFVNDSLSWLGQFDMVCVSGSLPAGVSPEAFTDWMTRLRSQCPCIIFDSSREALVAGLKAAPWLVKPNRRELEIWAGRKLPEMKDVIDAAHALREQGIAHVVISLGAEGALWVNASGEWIAKPPAVDVVSTVGAGDSMVGGLIYGLLMRESSEHTLRLATAVAALAVSQSNVGITDRPQLAAMMARVDLQPFN* ->3 -MNKNKYSTPLLMLATILAGMLSPMQSAVNGQLGHWLQDGNACAVISFASGLVVMFFIIIARKETRQQFASIPTLIKKRKIPLWNWFAGLCGAMVVFSEGASASALGVATFQTALISALLLSGLLCDRFGIGVEEKKYFTPWRITGALFAVIATIFVVSPQWHSTSFILLAILPFLAGLLAGWQPAGNAKVAEATGSMLVSITWNFIVGFCVLGAALAIRIALGHVTIQLPDTWWMYLGGPLGLLSIGLMAILVRGLGLLMLGVASTAGQLLGSVLIDELIPSLGNTVYLVTIIGTLFALVGAIVTTIPEYRASKMAQKMEVSE* ->4 -MNKNKYSTPLLMLATILAGMLSPMQSAVNGQLGHWLQDGNACAVISFASGLVVMFFIIIARKETRQQFASIPTLIKKRKIPLWNWFAGLCGAMVVFSEGASASALGVATFQTALISALLLSGLLCDRFGIGVEEKKYFTPWRITGALFAVIATIFVVSPQWHSTSFILLAILPFLAGLLAGWQPAGNAKVAEATGSMLVSITWNFIVGFCVLGAALAIRIALGHVTIQLPDTWWMYLGGPLGLLSIGLMAILVRGLGLLMLGVASTAGQLLGSVLIDELIPSLGNTVYLVTIIGTLFALVGAIVTTIPEYRASKMAQKMEVSE* ->5 -MKGRWAKYVATGVMLAMLAACSSKPTDRGQQYKDGKFTQPFSLVNQPDAVGAPINAGDFAEQVDQIRSASPRLYTNQSNVYNAVQNWLRSGGDTRTMRQFGIDAWQMEGTDNYGNVQFTGYYTPVVQARHTRQGAFQYPIYSMPPKRGRLPSRAQIYAGALSDKYILAWSNSLMDNFIMDVQGSGYIDFGDGSPLNFFSYAGKNGWPYRSIGKVLIDRGEVKKEDMSMQAIREWGEKHSEAEVRELLEQNPSFVFFKPQSFAPVKGASAVPLIGRASVASDRSIIPPGTTLLAEVPLLDNNGKFSGQYELRLMVALDVGGAIKGQHFDIYQGIGPDAGHRAGWYNHYGRVWVLKSAPGAGNVFSG* ->6 -LFDRYDAGEQAVLVHIYFSQDKDMEDLQEFESLVSSAGVEAMQVITGSRKAPHPKYFVGEGKAVEIAEAVKATGAAVVLFDHALSPAQERNLERLCECRVIDRTGLILDIFAQRARTHEGKLQVELAQLRHLATRLVRGWTHLERQKGGIGLRGPGETQLETDRRLLRNRIVQIQSRLEKVEKQREQGRQSRIKADVPTVSLVGYTNAGKSTLFNQITEARVYAADQLFATLDPTLRRIDVADVGETVLADTVGFIRHLPYDLVAAFKATLQETRQATLLLHVVDAADVRVQENIEAVNTVLEEIDAHEIPTLMVMNKIDMLDDFEPRIDRDEENKPIRVWLSAQSGVGIPQLFQALTERLSGEVAQHTLRLLPQEGRLRSRFYQLQAIEKEWMEEDGSVSLQVRMPIVDWRRLCKQEPALIEYVI* ->7 -VAQRILVLGASGYIGQHLVFALSQQGHQVRAAARRVERLEKHRLANVSCHKVDLHWPENLPALLRDIDTVYYLVHGMGEGGDFIAHERQAALNVRDALRQTPVKQLIFLSSLQAPAHEQSDHLRARQLTADTLRDAGVPVTELRAGIIVGAGSAAFEVMRDMVYNLPILTPPRWVRSRTTPIALENLLYYLVGLLEHPAHEHRILEAAGPQVLSYQQQFERFMAVSGKRRPLIPVPFPTRWISVWFLNVITSVPPTTAKALIQGLRHDLLADDAALKKLIPQTLITFDDAVRRTLKEEEKLVNSSDWGYDALAFARWRPEYGYFPKQAGFTAQTPASLSALWQVVNRLGGKEGYFFGNILWQTRAAMDRLVGHKLAKGRPSHTLLKPGDTVDSWKVIIVEPEKQLTLLFGMKAPGLGRLSFTLHDKGRYREIDVRAWWHPHGMPGLIYWLLMIPAHLFIFRGMARRIARLAEQITEK* ->8 -MNKFARHFLPLSLRVRFLLATAGVVLVLSLAYGIVALVGYSVSFDKTTFRLLRGESNLFYTLAKWENNKISVELPENLDMQSPTMTLIYDETGKLLWTQRNIPWLIKSIQPEWLKTNGFHEIETNVDATSTLLSEDHSAQEKLKEVREDDDDAEMTHSVAVNIYPATTRMPQLTIVVVDTIPIELKRSYMVWSWFVYVLAANLLLVIPLLWIAAWWSLRPIEALAREVRELEDHHREMLNPETTRELTSLVRNLNQLLKSERERYNKYRTTLTDLTHSLKTPLAVLQSTLRSLRNEKMSVSKAEPVMLEQISRISQQIGYYLHRASMRGSGVLLSRELHPVAPLLDNLISALNKVYQRKGVNISMDISPEISFVGEQNDFVEVMGNVLDNACKYCLEFVEISARQTDDHLHIFVEDDGPGIPHSKRSLVFDRGQRADTLRPGQGVGLAVAREITEQYAGQIIASDSLLGGARMEVVFGRQHPTQKEE* ->9 -MTIQKRLLEAVEQKLLRPIDAQFALTVAGNDDPAVTLAAALLSHDAGEGHVCLPLSRLTLTEEAHPLLVAWISETATPIDWKKRLLASAAVSCGDSPAPLILCGDRLYLNRMWCNERTVARFFNEVNQAIAVDEDQLSRILDALFPPTDEVNWQKVAAAVALTRRISVISGGPGTGKTTTVAKLLAALIQMADGERCRIRLAAPTGKAAARLTESLGAALRQLPLTDAQKKRIPEDASTLHRLLGAQPGSQRLRHHAGNPLHLDVLVVDEASMIDLPMMSRLIDALPPHGRVIFLGDRDQLASVEAGAVLGDICAYVNAGFTAERARQLSRLTGSAIPAGAGTQAASLRDSLCLLQKSYRFGSDSGIGKLAAAINCGDRSAIQAVFQQGFSDIEKRTLQSSDDYAGMLDEALAGYGRYLRLLHEKAAPEAILQAFNEYQLLCALREGPFGVRGLNDRIEQAMVQQRKIQRHPHSRWYEGRPVMIARNDSALGLFNGDIGIALDRGQGLRVWFVMPDGTIKSVQPSRLPEHDTTWAMTVHKSQGSEFDHAALILPSQRSPVVTRELVYTAVTRARRRLSLYADERILAGAIVTRTERRSGLATLFDEVSRIG* ->10 -MQEVAMSSQEASKMLRTYNIAWWGNNYYDVNELGHISVCPDPDVPEARVDLAKLVKAREAQGQRLPALFCFPQILQHRLRSINAAFKRARESYGYNGDYFLVYPIKVNQHRRVIESLIHSGEPLGLEAGSKAELMAVLAHAGMTRSVIVCNGYKDREYIRLALIGEKMGHKVYLVIEKMSEIAIVLEEAERLNVVPRLGVRARLASQGSGKWQSSGGEKSKFGLAATQVLQLVETLRDAGRLDSLQLLHFHLGSQMANIRDIATGVRESARFYVELHKLGVNIQCFDVGGGLGVDYEGTRSQSDCSVNYGLNEYANNIIWAIGDACEEHGLPHPTVITESGRAVTAHHTVLVSNIIGVERNEYTDPTAPAEDAPRALQNLWETWQEMHKPGTRRSLREWLHDSQMDLHDIHIGYSSGAFSLQERAWAEQLYLSMCHEVQKQLDPQNRAHRPIIDELQERMADKMYVNFSLFQSMPDAWGIDQLFPVLPLEGLDQVPERRAVLLDITCDSDGAIDHYIDGDGIATTMPMPEYDPENPPMLGFFMVGAYQEILGNMHNLFGDTEAVDVFVFPDGSVEVELSDEGDTVADMLQYVQLDPKTLLTHFRDQVKQTDLDDALQQQFLEEFEAGLYGYTYLEDE* ->11 -MNSLPQRSTDFELTTSQDGFALSWQQRLILRHSAENPCLWIGAGVADIDMFRGNFSIKDKLNEKIALTEATVSELPDGWLVQFSRGATISATLRISADEAGRLTLDLQNDDLHHNRIWLRLAANPDDHIYGCGEQFSYFDLRGKPFPLWTSEQGVGRNKTSYVTWQADCKENAGGDYYWTFFPQPTFVSTQKYYCHVDNSCYMNFDFSAPEYHELALWEDKTTLRFECADTYIALLEKLTALLGRQPELPDWVYDGVTLGIQGGTEVCQQKLDTMRNAGVKVNGIWAQDWSGIRMTSFGKRVMWNWKWNSDNYPQLDSRIKQWKEEGVQFLSYINPYVASDKDLCAEAARHGYLAKDATGGDYLVEFGEFYGGVVDLTNPEAYDWFKDVIKKNMIALGCSGWMADFGEYLPTDTYLHNGVSAEIMHNAWPALWAKCNYEALQETGKLGEILFFMRAGYTGSQKYSTMMWAGDQNVDWSLDDGLASVVPAALSLAMTGHGLHHSDIGGYTTLFDMKRSKELLLRWCDFSAFTPMMRTHEGNRPGNNWQFDGDAETIAHFARMTTVFTTLKPYLKQAVAQNAATGLPVMRPLFLHYENDAATYTLKYQYLLGQDLLVAPVHEQGRCDWTLYLPEDHWVNIWTGEAHHGGEITVDAPIGKPPVFYRAKSEWALLFASLRNI* ->12 -MKLFLTTAALTATLISGMAFASDPVIPWATNSGGTESTHIAAMGEDLNAQHQQITHTHEGVWAANSGSIQADEAALTSNKPPVQGHPELMPHQG* ->13 -MKHLRVVACMIMLALAGCDNNDKTAPTTKSEAPAVAQPSPAQDPSQLQKLAQQSQGKALTLLDASEAQLDGAATLVLTFSIPLDPEQDFSRVVHVVDKKSGSVDGAWELAPNLKELRLRHLEPERVLVVTVDPAVKALNNATFGKSYEKTITTRDVQPSVGFASRGSLLPGKIAEGLPVMALNVNHVDVNFFRVKPGSLASFVSQWEYRSSLSNWESDNLLKMADLVYTGRFDLNPARNTREKLLLPLSDIKPLQQAGVYVAVMNQAGHYNYSNAATLFTLSDIGVSAHRYHSRLDIFTQSLENGAAQSGIEIVLLNDKGQTLAQATSDAQGHVQLEADKAAALLLARKEGQTTLLDLTLPALDLSEFNVAGAPGYSKQFFMFGPRDLYRPGETVILNGLLRDSDGKTLPDQPVKLEVVKPDGQVMRTVVSQPENGLYRLNYPLDINAPTGLWHVRANTGDNLLRSWDFHVEDFMPERMALNLTAQKTPLAPADEVKFSVVGYYLYGAPANGNTLQGQLFLRPLRDAVAALPGFQFGNIAEENLSRSLDEVQLTLDKGGRGEVSAASQWQEAHSPLQVILQASLLESGGRPVTRRVEQAIWPADTLPGIRPQFAAKAVYDYRTDTTVNQPIVDEDSNAAFDIVYANAQGEKKAVSGLQVRLIRERRDYYWNWSESEGWQSQFDQKDLVEGEQTLDLNADETGKVSFPVEWGAYRLEVKAPNETVSSVRFWAGYSWQDNSDGSGAARPDRVTLKLDKANYRPGDTMKLHIAAPVAGKGYAMVESSDGPLWWQAIDVPAQGLELTIPVDKTWNRHDLYLSTLVVRPGDKSRSATPKRAVGLLHLPLGDDNRRLDLALESPAKMRPNQPLTVRVKASVKHGEMPKQINVLVSAVDSGVLNITDYATPDPWQAFFGQKRYGADIYDIYGQVIEGQGRLAALRFGGDGDDLTRGGKPPVNHANIIAQQAQPITLNEQGEGVVTLPIGDFNGELRVMAQAWTADDFGRGESKVVVAAPVIAELNMPRFLAGGDVSRLVLDVTNLTDRPQTLNIALAASGLLELLSQQPQPVNLAPGVRTTLFVPVRALEGFGEGEIQATISGLNLPGETLGAQHKQWQIGVRPAWPAQTVNSGIALAPGESWHVPEQHLANVSPATLQGQLLLSGKPPLNLARYIRELKAYPYGCLEQTTSGLFPALYTNAAQLQSLGITGDSDEKRRAAVDIGISRILQMQRDNGGFALWDENGAEEPWLTAYAMDFLIRAGEQGYSVPPEAINRGNERLLRYLQDPGTMLIRYSDNTQASTFAAQAYAALVLARQQKAPLGALREIWERRSQAASGLPLMQLGIALNTMGDARRGEEAITLALNTPRQDERQWIADYGSSLRDNALMLSLLEENNLRPDAQNALLSSLSEQAFGQRWLSTQENNALFLAAHSRQASAGAWQAQTSLEAQPLSGDKALTRNLDADQLAALEVTNTGSQPLWLRLDSSGYPSSAPEPASNVLQIERQILGTDGQRKSLSSLRSGELVLVWLTVVADRNVPDALVVDLLPAGLELENQNLADSSASLPESGSEVQNLLNQMQQADIQYMEFRDDRFVAAVVVNEGQPVTLVYLARAVTPGTYQLAQPQVESMYAPQWRATGASEGLLIVTP* ->14 -MAKNRSRRLRKKMHIDEFQELGFSVAWRFPEGTSEEQIDKTVDDFINDVIEPNKLAFDGSGYLAWEGLICMQEIGKCTEEHQAIVRKWLEARNLEEVRTSELFDVWWD* ->15 -LKLIPFYLLALFSAASGATEINACKDLIGTWKTTADNPPYTMTILPPVEGCGEKCVKLNVQYELDVTHRNALYCHEGQEGVKGQGPMVIAFEGAYGGHAIGTYNRQLQLLWAGVIPKNKKGKWITKMENYWFRQVKAH* ->16 -MKKFYSCLPVFLLIGCAQVPLPSSVSKPVQQPGAQKEQLANANSIDECQSLPYVPSDLAKNKSLSNQNADNSASKNSAISSSIFCEKYKQTKEQALTFFQEHPQYMRSKEDEEQLMTEFKKVLLEPGSKNLSIYQTLLAAHERLQAL* ->17 -MRIKPDDNWRWYYDEEHDRMMLDLANGMLFRSRFSRKMLTPDAFCPTGFCVDDAALYFSFEEKCRDFELTKEQRAELVLNALVAIRYLKPQMPKSWHFVAHGEMWTPGTGDAASVWLSDTAEQVNLLVVEPGENAALCLLAQPGVVIAGRTMQLGDAIKIMNDRLKPQVHCHSFSLEQAV* ->18 -VLAFTLRFIKNKRYFAILAGALVIIAGLASQHAWSGNGLPQINGKALAALAKQHPVVVLFRHAERCDRSDNTCLSDSTGITVNGAQDARALGKAFSADIQNYNLYSSNTVRTIQSATWFSAGRSLTADKKMMDCGSGIYASINTLLKKSQNKNIVIFTHNHCLTYIAKNKRGVKFDPDYLNALVMYAENGKLFLDGEFVPG* ->19 -MGSNYIVIEGLEGAGKTTARDVVVETLEQLGIRNMIFTREPGGTQLAEKLRSLVLDIRSVGDEVITDKAEVLMFYAARVQLVETVIKPALAQGVWVIGDRHDLSTQAYQGGGRGIDQTMLATLRDAVLGDFRPDLTLYLDVTPEVGLKRARARGDLDRIEQESFDFFNRTRARYLELAAQDSRIRTIDATQPLDAVMRDIRATVTKWVQEQAA* ->20 -MKHIKKSVLVVLLTSHVAHASIVVGGTRLVFDGNNDESSINVENKDSKANLVQSWLSVADPQVTNKQVFIITPPLFRLDAGQKNSIRVIRSGAPLPADRESMYWLNIKGIPSIDDNASANRVEISINTQIKLIYRPPALTKSTPDSQSQQLKWQTAGDVITVNNPTPYYMNFASVTLNSHEVKSATFVPPKSSASFKLSSTAAPHGTVTWRLISDYGMSLEPHSGSF* diff --git a/tests/test_data/outputs/search/G11/run.json b/tests/test_data/outputs/search/G11/run.json index fe00e54..80c2c53 100755 --- a/tests/test_data/outputs/search/G11/run.json +++ b/tests/test_data/outputs/search/G11/run.json @@ -1,11 +1,15 @@ { - "analysis_start_time": "10/06/2024 11:12:32", + "analysis_start_time": "19/06/2024 10:43:59", "parameters": { - "query": "locidex/extract/G11/raw.extracted.seqs.fasta", - "outdir": "locidex/search/G11", + "command": "search", + "query": "test_dev/extract/G11/raw.extracted.seqs.fasta", + "outdir": "test_dev/search/G11", + "db": "test_set/db", + "db_group": null, "name": "G11", - "db": "locidex/db", "config": null, + "db_name": "Locidex Test Database", + "db_version": "1.0.0", "min_evalue": 0.0001, "min_dna_len": 1, "min_aa_len": 1, @@ -20,8 +24,16 @@ "format": null, "translation_table": 11, "annotate": false, - "force": true + "force": true, + "db_date": "2024/06/06", + "db_author": "James Robertson", + "db_desc": "Using Salmonella GCA_000007545.2 as source data", + "db_num_seqs": 20, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" }, - "result_file": "locidex/search/G11/seq_store.json", - "analysis_end_time": "10/06/2024 11:12:34" + "result_file": "test_dev/search/G11/seq_store.json", + "analysis_end_time": "19/06/2024 10:43:59" } \ No newline at end of file diff --git a/tests/test_data/outputs/search/G11/seq_store.json b/tests/test_data/outputs/search/G11/seq_store.json index 0b6bcd9..102184d 100755 --- a/tests/test_data/outputs/search/G11/seq_store.json +++ b/tests/test_data/outputs/search/G11/seq_store.json @@ -460,274 +460,316 @@ "parent_id": "locus_1:0:0:0", "locus_name": "locus_1:0:0:0", "seq_id": "locus_1:0:0:0", + "dna_seq": "atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa", + "dna_ambig_count": 0, "dna_hash": "d17b02d12afa7f832ee37df6f24a8f55", "dna_len": 102, + "aa_seq": "MYDPPFLEALMITASFFAIFIIIVVSVLLLEGD*", "aa_hash": "a931d1f75114576e60538364eb01a05f", "aa_len": 34, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "1": { "parent_id": "locus_10:9:0:1", "locus_name": "locus_10:9:0:1", "seq_id": "locus_10:9:0:1", + "dna_seq": "atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa", + "dna_ambig_count": 0, "dna_hash": "fe04d17ec353c08b903c85fc0ca4dc02", "dna_len": 762, + "aa_seq": "MRILLSNDDGVHAPGIQTLAKALREFADVQVVAPDRNRSGASNSLTLESSLRTFTFDNGDIAVQMGTPTDCVYLGVNALMRPRPDIVVSGINAGPNLGDDVIYSGTVAAAMEGRHLGFPALAVSLNGYQHYDTAAAVTCALLRGLSREPLRTGRILNVNVPDLPLAQVKGIRVTRCGSRHPADKVIPQEDPRGNTLYWIGPPGDKYDAGPDTDFAAVDEGYVSVTPLHVDLTAHSAHDVVSDWLDSVGVGTQW*", "aa_hash": "988bf512f0362e276b0e5622fbaa7079", "aa_len": 254, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "2": { "parent_id": "locus_11:10:0:2", "locus_name": "locus_11:10:0:2", "seq_id": "locus_11:10:0:2", + "dna_seq": "gtgaacctggtgaaaaccacgggtctgcacgcggcgggcaaaggcattaacgttgccaaagtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccgacttcaacttttccggctttgacgtgactcccgcagactgggaacgctttgttaacgactccctgagctggctgggtcagttcgatatggtctgcgtcagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatctttgatagtagccgtgaagcgttagtcgccggtcttaaagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcacgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgtgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggtaagtaccgtcggcgcaggcgattccatggttggcggattgatttacggcctgctgatgcgcgagtccagcgaacatacgctgcgtctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga", + "dna_ambig_count": 0, "dna_hash": "c4266f2f24fdd8e039113c6b0955af9f", "dna_len": 858, + "aa_seq": "VNLVKTTGLHAAGKGINVAKVLKDLGIDVTVGGFLGKDNQDGFQQLFSELGIANRFQVVQGRTRINVKLTEKDGEVTDFNFSGFDVTPADWERFVNDSLSWLGQFDMVCVSGSLPAGVSPEAFTDWMTRLRSQCPCIIFDSSREALVAGLKAAPWLVKPNRRELEIWAGRKLPEMKDVIDAAHALREQGIAHVVISLGAEGALWVNASGEWIAKPPAVDVVSTVGAGDSMVGGLIYGLLMRESSEHTLRLATAVAALAVSQSNVGITDRPQLAAMMARVDLQPFN*", "aa_hash": "9b9be0e0a2b6f84053716d6c14a0fb9a", "aa_len": 286, "start_codon": "gtg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "3": { "parent_id": "locus_12:11:0:3", "locus_name": "locus_12:11:0:3", "seq_id": "locus_12:11:0:3", + "dna_seq": "atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga", + "dna_ambig_count": 0, "dna_hash": "eb72da68c159497d5f0c8eeddc51b5ae", "dna_len": 972, + "aa_seq": "MNKNKYSTPLLMLATILAGMLSPMQSAVNGQLGHWLQDGNACAVISFASGLVVMFFIIIARKETRQQFASIPTLIKKRKIPLWNWFAGLCGAMVVFSEGASASALGVATFQTALISALLLSGLLCDRFGIGVEEKKYFTPWRITGALFAVIATIFVVSPQWHSTSFILLAILPFLAGLLAGWQPAGNAKVAEATGSMLVSITWNFIVGFCVLGAALAIRIALGHVTIQLPDTWWMYLGGPLGLLSIGLMAILVRGLGLLMLGVASTAGQLLGSVLIDELIPSLGNTVYLVTIIGTLFALVGAIVTTIPEYRASKMAQKMEVSE*", "aa_hash": "2fb77f6d0f615a030a08af74f3eda277", "aa_len": 324, "start_codon": "atg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "4": { "parent_id": "locus_12:11:0:4", "locus_name": "locus_12:11:0:4", "seq_id": "locus_12:11:0:4", + "dna_seq": "atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga", + "dna_ambig_count": 0, "dna_hash": "eb72da68c159497d5f0c8eeddc51b5ae", "dna_len": 972, + "aa_seq": "MNKNKYSTPLLMLATILAGMLSPMQSAVNGQLGHWLQDGNACAVISFASGLVVMFFIIIARKETRQQFASIPTLIKKRKIPLWNWFAGLCGAMVVFSEGASASALGVATFQTALISALLLSGLLCDRFGIGVEEKKYFTPWRITGALFAVIATIFVVSPQWHSTSFILLAILPFLAGLLAGWQPAGNAKVAEATGSMLVSITWNFIVGFCVLGAALAIRIALGHVTIQLPDTWWMYLGGPLGLLSIGLMAILVRGLGLLMLGVASTAGQLLGSVLIDELIPSLGNTVYLVTIIGTLFALVGAIVTTIPEYRASKMAQKMEVSE*", "aa_hash": "2fb77f6d0f615a030a08af74f3eda277", "aa_len": 324, "start_codon": "atg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "5": { "parent_id": "locus_13:12:0:5", "locus_name": "locus_13:12:0:5", "seq_id": "locus_13:12:0:5", + "dna_seq": "atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga", + "dna_ambig_count": 0, "dna_hash": "8f300259dcb46224bdc1fe5273107324", "dna_len": 1098, + "aa_seq": "MKGRWAKYVATGVMLAMLAACSSKPTDRGQQYKDGKFTQPFSLVNQPDAVGAPINAGDFAEQVDQIRSASPRLYTNQSNVYNAVQNWLRSGGDTRTMRQFGIDAWQMEGTDNYGNVQFTGYYTPVVQARHTRQGAFQYPIYSMPPKRGRLPSRAQIYAGALSDKYILAWSNSLMDNFIMDVQGSGYIDFGDGSPLNFFSYAGKNGWPYRSIGKVLIDRGEVKKEDMSMQAIREWGEKHSEAEVRELLEQNPSFVFFKPQSFAPVKGASAVPLIGRASVASDRSIIPPGTTLLAEVPLLDNNGKFSGQYELRLMVALDVGGAIKGQHFDIYQGIGPDAGHRAGWYNHYGRVWVLKSAPGAGNVFSG*", "aa_hash": "fd5140611af6e0d9f95426bf9beaa8b1", "aa_len": 366, "start_codon": "atg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "6": { "parent_id": "locus_14:13:0:6", "locus_name": "locus_14:13:0:6", "seq_id": "locus_14:13:0:6", + "dna_seq": "ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctcttccgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccaacgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcagaaaggcgggattggtttgcgcggtccgggtgaaacccagctcgaaaccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgaaaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgctgctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaaatccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtctgcaggtacgaatgccgattgtcgactggcgtcgcctctgtaaacaagaaccagcgttgatcgaatacgtgatctag", + "dna_ambig_count": 0, "dna_hash": "2fa0b06ed72e36b4071cab9d0b4f87d0", "dna_len": 1281, + "aa_seq": "LFDRYDAGEQAVLVHIYFSQDKDMEDLQEFESLVSSAGVEAMQVITGSRKAPHPKYFVGEGKAVEIAEAVKATGAAVVLFDHALSPAQERNLERLCECRVIDRTGLILDIFAQRARTHEGKLQVELAQLRHLATRLVRGWTHLERQKGGIGLRGPGETQLETDRRLLRNRIVQIQSRLEKVEKQREQGRQSRIKADVPTVSLVGYTNAGKSTLFNQITEARVYAADQLFATLDPTLRRIDVADVGETVLADTVGFIRHLPYDLVAAFKATLQETRQATLLLHVVDAADVRVQENIEAVNTVLEEIDAHEIPTLMVMNKIDMLDDFEPRIDRDEENKPIRVWLSAQSGVGIPQLFQALTERLSGEVAQHTLRLLPQEGRLRSRFYQLQAIEKEWMEEDGSVSLQVRMPIVDWRRLCKQEPALIEYVI*", "aa_hash": "bf5190f310477277da454725d434a8ee", "aa_len": 427, "start_codon": "ttg", - "stop_codon": "tag", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tag", + "count_internal_stop": 0 }, "7": { "parent_id": "locus_15:14:0:7", "locus_name": "locus_15:14:0:7", "seq_id": "locus_15:14:0:7", + "dna_seq": "gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga", + "dna_ambig_count": 0, "dna_hash": "bc98c2fe196a68a79036814396513a8d", "dna_len": 1434, + "aa_seq": "VAQRILVLGASGYIGQHLVFALSQQGHQVRAAARRVERLEKHRLANVSCHKVDLHWPENLPALLRDIDTVYYLVHGMGEGGDFIAHERQAALNVRDALRQTPVKQLIFLSSLQAPAHEQSDHLRARQLTADTLRDAGVPVTELRAGIIVGAGSAAFEVMRDMVYNLPILTPPRWVRSRTTPIALENLLYYLVGLLEHPAHEHRILEAAGPQVLSYQQQFERFMAVSGKRRPLIPVPFPTRWISVWFLNVITSVPPTTAKALIQGLRHDLLADDAALKKLIPQTLITFDDAVRRTLKEEEKLVNSSDWGYDALAFARWRPEYGYFPKQAGFTAQTPASLSALWQVVNRLGGKEGYFFGNILWQTRAAMDRLVGHKLAKGRPSHTLLKPGDTVDSWKVIIVEPEKQLTLLFGMKAPGLGRLSFTLHDKGRYREIDVRAWWHPHGMPGLIYWLLMIPAHLFIFRGMARRIARLAEQITEK*", "aa_hash": "5d8c3cf06444c3741ebfc57cc92db0d3", "aa_len": 478, "start_codon": "gtg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "8": { "parent_id": "locus_16:15:0:8", "locus_name": "locus_16:15:0:8", "seq_id": "locus_16:15:0:8", + "dna_seq": "atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctggtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatcagcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacgctgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcattcaaccggaatggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaacgctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgaggcgctggcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtgagctgaccagccttgtgcgcaaccttaatcaactgctcaaaagcgagcgcgaacgttataacaaataccgcacgaccctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatcagcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcataccccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttttggccgacagcatcccacacagaaagaggaataa", + "dna_ambig_count": 0, "dna_hash": "a9b3cb97dac3cda6e932a49bf9a507bd", "dna_len": 1464, + "aa_seq": "MNKFARHFLPLSLRVRFLLATAGVVLVLSLAYGIVALVGYSVSFDKTTFRLLRGESNLFYTLAKWENNKISVELPENLDMQSPTMTLIYDETGKLLWTQRNIPWLIKSIQPEWLKTNGFHEIETNVDATSTLLSEDHSAQEKLKEVREDDDDAEMTHSVAVNIYPATTRMPQLTIVVVDTIPIELKRSYMVWSWFVYVLAANLLLVIPLLWIAAWWSLRPIEALAREVRELEDHHREMLNPETTRELTSLVRNLNQLLKSERERYNKYRTTLTDLTHSLKTPLAVLQSTLRSLRNEKMSVSKAEPVMLEQISRISQQIGYYLHRASMRGSGVLLSRELHPVAPLLDNLISALNKVYQRKGVNISMDISPEISFVGEQNDFVEVMGNVLDNACKYCLEFVEISARQTDDHLHIFVEDDGPGIPHSKRSLVFDRGQRADTLRPGQGVGLAVAREITEQYAGQIIASDSLLGGARMEVVFGRQHPTQKEE*", "aa_hash": "3ca5f1d7b46eda9460608ef61603c12f", "aa_len": 488, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "9": { "parent_id": "locus_17:16:0:9", "locus_name": "locus_17:16:0:9", "seq_id": "locus_17:16:0:9", + "dna_seq": "atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa", + "dna_ambig_count": 0, "dna_hash": "a0d97d985483413f3c18bfe5833ae9ce", "dna_len": 1836, + "aa_seq": "MTIQKRLLEAVEQKLLRPIDAQFALTVAGNDDPAVTLAAALLSHDAGEGHVCLPLSRLTLTEEAHPLLVAWISETATPIDWKKRLLASAAVSCGDSPAPLILCGDRLYLNRMWCNERTVARFFNEVNQAIAVDEDQLSRILDALFPPTDEVNWQKVAAAVALTRRISVISGGPGTGKTTTVAKLLAALIQMADGERCRIRLAAPTGKAAARLTESLGAALRQLPLTDAQKKRIPEDASTLHRLLGAQPGSQRLRHHAGNPLHLDVLVVDEASMIDLPMMSRLIDALPPHGRVIFLGDRDQLASVEAGAVLGDICAYVNAGFTAERARQLSRLTGSAIPAGAGTQAASLRDSLCLLQKSYRFGSDSGIGKLAAAINCGDRSAIQAVFQQGFSDIEKRTLQSSDDYAGMLDEALAGYGRYLRLLHEKAAPEAILQAFNEYQLLCALREGPFGVRGLNDRIEQAMVQQRKIQRHPHSRWYEGRPVMIARNDSALGLFNGDIGIALDRGQGLRVWFVMPDGTIKSVQPSRLPEHDTTWAMTVHKSQGSEFDHAALILPSQRSPVVTRELVYTAVTRARRRLSLYADERILAGAIVTRTERRSGLATLFDEVSRIG*", "aa_hash": "bdd2b967a7a6337b5973dd723a62fa43", "aa_len": 612, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "10": { "parent_id": "locus_18:17:0:10", "locus_name": "locus_18:17:0:10", "seq_id": "locus_18:17:0:10", + "dna_seq": "atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag", + "dna_ambig_count": 0, "dna_hash": "b3021e979faa7600756c06dfadfcf14c", "dna_len": 1914, + "aa_seq": "MQEVAMSSQEASKMLRTYNIAWWGNNYYDVNELGHISVCPDPDVPEARVDLAKLVKAREAQGQRLPALFCFPQILQHRLRSINAAFKRARESYGYNGDYFLVYPIKVNQHRRVIESLIHSGEPLGLEAGSKAELMAVLAHAGMTRSVIVCNGYKDREYIRLALIGEKMGHKVYLVIEKMSEIAIVLEEAERLNVVPRLGVRARLASQGSGKWQSSGGEKSKFGLAATQVLQLVETLRDAGRLDSLQLLHFHLGSQMANIRDIATGVRESARFYVELHKLGVNIQCFDVGGGLGVDYEGTRSQSDCSVNYGLNEYANNIIWAIGDACEEHGLPHPTVITESGRAVTAHHTVLVSNIIGVERNEYTDPTAPAEDAPRALQNLWETWQEMHKPGTRRSLREWLHDSQMDLHDIHIGYSSGAFSLQERAWAEQLYLSMCHEVQKQLDPQNRAHRPIIDELQERMADKMYVNFSLFQSMPDAWGIDQLFPVLPLEGLDQVPERRAVLLDITCDSDGAIDHYIDGDGIATTMPMPEYDPENPPMLGFFMVGAYQEILGNMHNLFGDTEAVDVFVFPDGSVEVELSDEGDTVADMLQYVQLDPKTLLTHFRDQVKQTDLDDALQQQFLEEFEAGLYGYTYLEDE*", "aa_hash": "42c4a831ee79a27c47138fe96829814b", "aa_len": 638, "start_codon": "atg", - "stop_codon": "tag", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tag", + "count_internal_stop": 0 }, "11": { "parent_id": "locus_19:18:0:11", "locus_name": "locus_19:18:0:11", "seq_id": "locus_19:18:0:11", + "dna_seq": "atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacattgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccaccgtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggccgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattactggaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcattcagggcggtacggaagtttgtcagcaaaaactggataccatgcgcaacgcaggcgtaaaagtgaacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggcgtcagcgcagagatcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacaggagaccggcaagctcggcgagatcctgttctttatgcgtgcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccggcaataactggcagttcgacggcgacgcggaaactattgcccactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgaaattaccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa", + "dna_ambig_count": 0, "dna_hash": "de32372598811d63bcc1a0eaf6872644", "dna_len": 2037, + "aa_seq": "MNSLPQRSTDFELTTSQDGFALSWQQRLILRHSAENPCLWIGAGVADIDMFRGNFSIKDKLNEKIALTEATVSELPDGWLVQFSRGATISATLRISADEAGRLTLDLQNDDLHHNRIWLRLAANPDDHIYGCGEQFSYFDLRGKPFPLWTSEQGVGRNKTSYVTWQADCKENAGGDYYWTFFPQPTFVSTQKYYCHVDNSCYMNFDFSAPEYHELALWEDKTTLRFECADTYIALLEKLTALLGRQPELPDWVYDGVTLGIQGGTEVCQQKLDTMRNAGVKVNGIWAQDWSGIRMTSFGKRVMWNWKWNSDNYPQLDSRIKQWKEEGVQFLSYINPYVASDKDLCAEAARHGYLAKDATGGDYLVEFGEFYGGVVDLTNPEAYDWFKDVIKKNMIALGCSGWMADFGEYLPTDTYLHNGVSAEIMHNAWPALWAKCNYEALQETGKLGEILFFMRAGYTGSQKYSTMMWAGDQNVDWSLDDGLASVVPAALSLAMTGHGLHHSDIGGYTTLFDMKRSKELLLRWCDFSAFTPMMRTHEGNRPGNNWQFDGDAETIAHFARMTTVFTTLKPYLKQAVAQNAATGLPVMRPLFLHYENDAATYTLKYQYLLGQDLLVAPVHEQGRCDWTLYLPEDHWVNIWTGEAHHGGEITVDAPIGKPPVFYRAKSEWALLFASLRNI*", "aa_hash": "a48a4e4dc8c7f61a7be06a7f72142198", "aa_len": 679, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "12": { "parent_id": "locus_2:1:0:12", "locus_name": "locus_2:1:0:12", "seq_id": "locus_2:1:0:12", + "dna_seq": "atgaaactttttctgacaacggcagcgctaactgcgaccttgatttccggcatggcatttgccagcgatcccgtcatcccctgggcgactaacagcggcggtacggaaagcacgcatattgcggcaatgggcgaggatttgaatgcgcagcatcagcagatcacccacacgcatgaaggcgtatgggcggccaattccggcagcattcaggctgatgaagccgcgctgaccagcaacaagccgccggtacaaggccacccggaattgatgccgcatcagggctaa", + "dna_ambig_count": 0, "dna_hash": "8b70e777f6bbf2c91ff75947824b5976", "dna_len": 285, + "aa_seq": "MKLFLTTAALTATLISGMAFASDPVIPWATNSGGTESTHIAAMGEDLNAQHQQITHTHEGVWAANSGSIQADEAALTSNKPPVQGHPELMPHQG*", "aa_hash": "6e403f4ed2da629ea2ebfe18278ed120", "aa_len": 95, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "13": { "parent_id": "locus_20:19:0:13", "locus_name": "locus_20:19:0:13", "seq_id": "locus_20:19:0:13", + "dna_seq": "atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa", + "dna_ambig_count": 0, "dna_hash": "4461918e985715e4a2b07494e1f91326", "dna_len": 4935, + "aa_seq": "MKHLRVVACMIMLALAGCDNNDKTAPTTKSEAPAVAQPSPAQDPSQLQKLAQQSQGKALTLLDASEAQLDGAATLVLTFSIPLDPEQDFSRVVHVVDKKSGSVDGAWELAPNLKELRLRHLEPERVLVVTVDPAVKALNNATFGKSYEKTITTRDVQPSVGFASRGSLLPGKIAEGLPVMALNVNHVDVNFFRVKPGSLASFVSQWEYRSSLSNWESDNLLKMADLVYTGRFDLNPARNTREKLLLPLSDIKPLQQAGVYVAVMNQAGHYNYSNAATLFTLSDIGVSAHRYHSRLDIFTQSLENGAAQSGIEIVLLNDKGQTLAQATSDAQGHVQLEADKAAALLLARKEGQTTLLDLTLPALDLSEFNVAGAPGYSKQFFMFGPRDLYRPGETVILNGLLRDSDGKTLPDQPVKLEVVKPDGQVMRTVVSQPENGLYRLNYPLDINAPTGLWHVRANTGDNLLRSWDFHVEDFMPERMALNLTAQKTPLAPADEVKFSVVGYYLYGAPANGNTLQGQLFLRPLRDAVAALPGFQFGNIAEENLSRSLDEVQLTLDKGGRGEVSAASQWQEAHSPLQVILQASLLESGGRPVTRRVEQAIWPADTLPGIRPQFAAKAVYDYRTDTTVNQPIVDEDSNAAFDIVYANAQGEKKAVSGLQVRLIRERRDYYWNWSESEGWQSQFDQKDLVEGEQTLDLNADETGKVSFPVEWGAYRLEVKAPNETVSSVRFWAGYSWQDNSDGSGAARPDRVTLKLDKANYRPGDTMKLHIAAPVAGKGYAMVESSDGPLWWQAIDVPAQGLELTIPVDKTWNRHDLYLSTLVVRPGDKSRSATPKRAVGLLHLPLGDDNRRLDLALESPAKMRPNQPLTVRVKASVKHGEMPKQINVLVSAVDSGVLNITDYATPDPWQAFFGQKRYGADIYDIYGQVIEGQGRLAALRFGGDGDDLTRGGKPPVNHANIIAQQAQPITLNEQGEGVVTLPIGDFNGELRVMAQAWTADDFGRGESKVVVAAPVIAELNMPRFLAGGDVSRLVLDVTNLTDRPQTLNIALAASGLLELLSQQPQPVNLAPGVRTTLFVPVRALEGFGEGEIQATISGLNLPGETLGAQHKQWQIGVRPAWPAQTVNSGIALAPGESWHVPEQHLANVSPATLQGQLLLSGKPPLNLARYIRELKAYPYGCLEQTTSGLFPALYTNAAQLQSLGITGDSDEKRRAAVDIGISRILQMQRDNGGFALWDENGAEEPWLTAYAMDFLIRAGEQGYSVPPEAINRGNERLLRYLQDPGTMLIRYSDNTQASTFAAQAYAALVLARQQKAPLGALREIWERRSQAASGLPLMQLGIALNTMGDARRGEEAITLALNTPRQDERQWIADYGSSLRDNALMLSLLEENNLRPDAQNALLSSLSEQAFGQRWLSTQENNALFLAAHSRQASAGAWQAQTSLEAQPLSGDKALTRNLDADQLAALEVTNTGSQPLWLRLDSSGYPSSAPEPASNVLQIERQILGTDGQRKSLSSLRSGELVLVWLTVVADRNVPDALVVDLLPAGLELENQNLADSSASLPESGSEVQNLLNQMQQADIQYMEFRDDRFVAAVVVNEGQPVTLVYLARAVTPGTYQLAQPQVESMYAPQWRATGASEGLLIVTP*", "aa_hash": "ffe361d9273afbab0ed9009dd5aa4041", "aa_len": 1645, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "14": { "parent_id": "locus_3:2:0:14", "locus_name": "locus_3:2:0:14", "seq_id": "locus_3:2:0:14", + "dna_seq": "atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa", + "dna_ambig_count": 0, "dna_hash": "670705cd2a59c4a23a897ac656a888fe", "dna_len": 327, + "aa_seq": "MAKNRSRRLRKKMHIDEFQELGFSVAWRFPEGTSEEQIDKTVDDFINDVIEPNKLAFDGSGYLAWEGLICMQEIGKCTEEHQAIVRKWLEARNLEEVRTSELFDVWWD*", "aa_hash": "f46f4f7b2362abaf1440c9752e10670e", "aa_len": 109, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "15": { "parent_id": "locus_4:3:0:15", "locus_name": "locus_4:3:0:15", "seq_id": "locus_4:3:0:15", + "dna_seq": "ctgaaactaatacctttttatttgctcgcgctattttctgccgcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaaccactgcagataacccgccttataccatgacaatattgccgccagtagaaggctgcggggaaaaatgtgtgaaactgaacgtacagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaaggacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctataaccgacagctgcaattactttgggctggcgtaataccgaaaaataaaaaggggaaatggataacgaaaatggaaaattactggttcaggcaggtaaaagcgcactaa", + "dna_ambig_count": 0, "dna_hash": "73790840c76943caac0ebb3b2b3f0b98", "dna_len": 417, + "aa_seq": "LKLIPFYLLALFSAASGATEINACKDLIGTWKTTADNPPYTMTILPPVEGCGEKCVKLNVQYELDVTHRNALYCHEGQEGVKGQGPMVIAFEGAYGGHAIGTYNRQLQLLWAGVIPKNKKGKWITKMENYWFRQVKAH*", "aa_hash": "77784601d754a5f36152853592023b08", "aa_len": 139, "start_codon": "ctg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "16": { "parent_id": "locus_5:4:0:16", "locus_name": "locus_5:4:0:16", "seq_id": "locus_5:4:0:16", + "dna_seq": "atgaaaaaattttatagctgtcttcctgtctttttactgatcggctgtgcccaggtgcccctcccttcctccgtgagcaaaccggtacagcaacctggcgctcagaaagagcaactggccaacgcaaatagtattgatgagtgtcagtctcttccgtatgtgccgtcagaccttgcgaagaataaatcattatcaaaccagaacgctgataattccgcatcaaaaaatagcgcaatcagctcaagcattttttgcgaaaaatataaacaaaccaaagaacaggcgcttaccttcttccaggaacatccacaatacatgcgttcgaaagaagatgaagagcaactcatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa", + "dna_ambig_count": 0, "dna_hash": "8cf4341689dd00f74adfcc43d1f4a35e", "dna_len": 444, + "aa_seq": "MKKFYSCLPVFLLIGCAQVPLPSSVSKPVQQPGAQKEQLANANSIDECQSLPYVPSDLAKNKSLSNQNADNSASKNSAISSSIFCEKYKQTKEQALTFFQEHPQYMRSKEDEEQLMTEFKKVLLEPGSKNLSIYQTLLAAHERLQAL*", "aa_hash": "736cc3184dda2c5ac596f76753272622", "aa_len": 148, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "17": { "parent_id": "locus_6:5:0:17", "locus_name": "locus_6:5:0:17", "seq_id": "locus_6:5:0:17", + "dna_seq": "atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa", + "dna_ambig_count": 0, "dna_hash": "a11561f2804e2c32c78049f8b9aeb517", "dna_len": 543, + "aa_seq": "MRIKPDDNWRWYYDEEHDRMMLDLANGMLFRSRFSRKMLTPDAFCPTGFCVDDAALYFSFEEKCRDFELTKEQRAELVLNALVAIRYLKPQMPKSWHFVAHGEMWTPGTGDAASVWLSDTAEQVNLLVVEPGENAALCLLAQPGVVIAGRTMQLGDAIKIMNDRLKPQVHCHSFSLEQAV*", "aa_hash": "f7a904be6607ca5a6d9388ad9ca3693a", "aa_len": 181, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "18": { "parent_id": "locus_7:6:0:18", "locus_name": "locus_7:6:0:18", "seq_id": "locus_7:6:0:18", + "dna_seq": "gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggcctcacaacatgcctggagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgtcagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcaccatccagtcggcaacctggttttccgcaggcaggtcgcttacggcggataaaaaaatgatggattgcggcagcggtatttatgcttcaatcaatacgttactgaaaaagtcgcagaataaaaatatagtcatttttacccataatcattgtttgacgtatattgcaaaaaataagcgcggcgtgaagtttgatccggactatcttaatgcgctcgttatgtatgcggaaaacggaaaactctttttagacggtgaatttgttcccggctag", + "dna_ambig_count": 0, "dna_hash": "49d9878c9d3071aa1d2f26cb947b784c", "dna_len": 606, + "aa_seq": "VLAFTLRFIKNKRYFAILAGALVIIAGLASQHAWSGNGLPQINGKALAALAKQHPVVVLFRHAERCDRSDNTCLSDSTGITVNGAQDARALGKAFSADIQNYNLYSSNTVRTIQSATWFSAGRSLTADKKMMDCGSGIYASINTLLKKSQNKNIVIFTHNHCLTYIAKNKRGVKFDPDYLNALVMYAENGKLFLDGEFVPG*", "aa_hash": "a1169e1ef4c2882247a9349da07cb6bd", "aa_len": 202, "start_codon": "gtg", - "stop_codon": "tag", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tag", + "count_internal_stop": 0 }, "19": { "parent_id": "locus_8:7:0:19", "locus_name": "locus_8:7:0:19", "seq_id": "locus_8:7:0:19", + "dna_seq": "atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga", + "dna_ambig_count": 0, "dna_hash": "7ebe74afecf146ec4db816c8deced64f", "dna_len": 642, + "aa_seq": "MGSNYIVIEGLEGAGKTTARDVVVETLEQLGIRNMIFTREPGGTQLAEKLRSLVLDIRSVGDEVITDKAEVLMFYAARVQLVETVIKPALAQGVWVIGDRHDLSTQAYQGGGRGIDQTMLATLRDAVLGDFRPDLTLYLDVTPEVGLKRARARGDLDRIEQESFDFFNRTRARYLELAAQDSRIRTIDATQPLDAVMRDIRATVTKWVQEQAA*", "aa_hash": "ad5a47cb3a55985083b963b0cf67f382", "aa_len": 214, "start_codon": "atg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "20": { "parent_id": "locus_9:8:0:20", "locus_name": "locus_9:8:0:20", "seq_id": "locus_9:8:0:20", + "dna_seq": "atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag", + "dna_ambig_count": 0, "dna_hash": "41ebb36872854b2b33c8c028e23d8ad1", "dna_len": 684, + "aa_seq": "MKHIKKSVLVVLLTSHVAHASIVVGGTRLVFDGNNDESSINVENKDSKANLVQSWLSVADPQVTNKQVFIITPPLFRLDAGQKNSIRVIRSGAPLPADRESMYWLNIKGIPSIDDNASANRVEISINTQIKLIYRPPALTKSTPDSQSQQLKWQTAGDVITVNNPTPYYMNFASVTLNSHEVKSATFVPPKSSASFKLSSTAAPHGTVTWRLISDYGMSLEPHSGSF*", "aa_hash": "02989d1dbb505d15f06e76ddc9174b7e", "aa_len": 228, "start_codon": "atg", - "stop_codon": "tag", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tag", + "count_internal_stop": 0 } }, "query_hit_columns": [], diff --git a/tests/test_data/outputs/search/G12/blast/nucleotide/hsps.txt b/tests/test_data/outputs/search/G12/blast/nucleotide/hsps.txt deleted file mode 100755 index 9ebac93..0000000 --- a/tests/test_data/outputs/search/G12/blast/nucleotide/hsps.txt +++ /dev/null @@ -1,21 +0,0 @@ -0 0 102 102 1 102 1 102 102 0 100.000 100 100 plus 1.80e-51 189 -1 9 762 762 1 762 1 762 762 0 100.000 100 100 plus 0.0 1408 -2 10 858 858 1 858 1 858 858 0 100.000 100 100 plus 0.0 1585 -3 11 972 972 1 972 1 972 972 0 100.000 100 100 plus 0.0 1796 -4 11 972 972 1 972 1 972 972 0 100.000 100 100 plus 0.0 1796 -5 12 1098 1098 1 1098 1 1098 1098 0 100.000 100 100 plus 0.0 2028 -6 13 1281 1281 1 1281 1 1281 1281 0 100.000 100 100 plus 0.0 2366 -7 14 1434 1434 1 1434 1 1434 1434 0 100.000 100 100 plus 0.0 2649 -8 15 1464 1464 1 1464 1 1464 1464 0 100.000 100 100 plus 0.0 2704 -9 16 1836 1836 1 1836 1 1836 1836 0 100.000 100 100 plus 0.0 3391 -10 17 1914 1914 1 1914 1 1914 1914 0 100.000 100 100 plus 0.0 3535 -11 18 2037 2037 1 2037 1 2037 2037 0 100.000 100 100 plus 0.0 3762 -12 1 285 285 1 285 1 285 285 0 100.000 100 100 plus 1.03e-152 527 -13 19 4935 4935 1 4935 1 4935 4935 0 100.000 100 100 plus 0.0 9114 -14 2 327 327 1 327 1 327 327 0 100.000 100 100 plus 5.34e-176 604 -15 3 417 417 1 417 1 417 417 0 100.000 100 100 plus 0.0 771 -16 4 444 444 1 444 1 444 444 0 100.000 100 100 plus 0.0 821 -17 5 543 543 1 543 1 543 543 0 100.000 100 100 plus 0.0 1003 -18 6 606 606 1 606 1 606 606 0 100.000 100 100 plus 0.0 1120 -19 7 642 642 1 642 1 642 642 0 100.000 100 100 plus 0.0 1186 -20 8 684 684 1 684 1 684 684 0 100.000 100 100 plus 0.0 1264 diff --git a/tests/test_data/outputs/search/G12/blast/nucleotide/queries.fasta b/tests/test_data/outputs/search/G12/blast/nucleotide/queries.fasta deleted file mode 100755 index 80602ca..0000000 --- a/tests/test_data/outputs/search/G12/blast/nucleotide/queries.fasta +++ /dev/null @@ -1,42 +0,0 @@ ->0 -atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa ->1 -atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa ->2 -gtgaacctggtgaaaaccccgggtctgcacgcggcgggcaaaggcattaacgttgccaatgtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccaacttcaactttcccggctttgacgtgactcccgcagactgggaacgctttgttaacgacttcctgagctggctgggtcagttcgatatggtctgcgtaagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatcgttgatactagccgtgtagagttagtcgccggtcttatagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcccgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgcgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggaaaataccgtcggcgcaggcggttccatggttggcggatggatttacggcctgctgatgcgcgagtccaccgaacatacgctgcgcctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga ->3 -atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga ->4 -atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga ->5 -atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga ->6 -ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctattacgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccagcgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcggaaaggcgggattggttcgcgcggtccgggtgaaacccagctcgaagccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgagaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgcagctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaattccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtccgcaggtacgaatgccgattgtcgactggcgtcgcttctgtaaacaagaaccagcgttgatcgaatacgtgatctag ->7 -gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga ->8 -atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctcgtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatctgcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacggtgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcactcaaccggaagggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaaggctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgatgcgctgtcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtaagctgatcagccttgtgcgcaaccttaatcaactgctcaaaagcgagcacgaacgttataacaaataccgcacgagcctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatctgcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcatagcccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttctggccgacagcatcccacacagaaagaggaataa ->9 -atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa ->10 -atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag ->11 -atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacatcgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccacagtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggtcgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattacttgaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcatacagggcggtacggaagtttgtcagcaaaaactggataacatgcgcaacgcaggcgtaaaagtgtacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggggtcagcgcagagctcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacagaagaccggcaagctcggcgagatcctgttctttatgcgggcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccagcaatgactggcagttcgacggcgacgcggaaactattgcacactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgagatttccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa ->12 -atgaaactttttctgacaacggcagcgctaactgcgaccttgacttccggcatgggatttgccagcgataccgtgatcccctgggcgactaacagtggcggtacggaaagcacgcatatcgcggcaatgggcgaggatgtgaatgcgcagcatcagcacatcatccacacgcatgaaggcgtatgtgccgcaaattccggcaccattcaggctgatgaagccgcgctgaccagcaacaagccaccggtacaagtccaaccggaattgttgccgcatcagggctaa ->13 -atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa ->14 -atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa ->15 -ctgaaactaatacctttttatttgctcgcgctattttctgcctcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaacccctgcagataacccgccttataccgtgacaatattgccgccagtagaagcctgcggggaaaaatgtgtgaaactgaacgtagagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaacgacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctacaaccgacagctgcaattacttagggctggcgtaataccgaaaaataaaaagtggaaatggataacgaagatggaaaattactggttcaggcgggtaaaagcgcactaa ->16 -atgaaaaaatttgatagctgtcatcctgtctttttactgatcggctgtgcccaggtgcctctccgttcctccgtgagcaaaccggtacagcaacctagcgctcagaaagagcaactggccaacgcaaatggtattgatgagtgtcagtcacttccgtatgtgccttcagaccttgcgaagaataaatcattatcaaaccagatcgctgataataccgcatcaaaaaatagcgcaatcagctcacgcattttttgcgaaaaatataagcaaaccaaagaacaggcgcttaccttcttccaggaacttccacaatacatgcgttcgaaagaagttgaagagcaacacatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa ->17 -atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa ->18 -gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggactcacaacatgcctgtagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgccagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcgccatccagtcggcaacctggttttcggcaggcaggtcgcgtagggcggttaaaaaaatgatggattgcggcagcggtatttattcttcaatcaatacgttactgaaaaattcgcagattaaaaatatagtcattttaacccataatcattgtttgacgtatattgtaaaaaataagcgcggcgtgaagtttgatccggagtatcttaatgcgctcgttatgtatgcggaaaacggaaaactccttttagacggtgaatttgttcccggctag ->19 -atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga ->20 -atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/search/G12/blast/protein/hsps.txt b/tests/test_data/outputs/search/G12/blast/protein/hsps.txt deleted file mode 100755 index 553fd81..0000000 --- a/tests/test_data/outputs/search/G12/blast/protein/hsps.txt +++ /dev/null @@ -1,21 +0,0 @@ -0 0 34 34 1 34 1 34 34 0 100.000 100 100 N/A 4.94e-20 64.3 -1 9 254 254 1 254 1 254 254 0 100.000 100 100 N/A 0.0 514 -2 10 286 286 1 286 1 286 286 0 100.000 100 100 N/A 0.0 579 -3 11 324 324 1 324 1 324 324 0 100.000 100 100 N/A 0.0 640 -4 11 324 324 1 324 1 324 324 0 100.000 100 100 N/A 0.0 640 -5 12 366 366 1 366 1 366 366 0 100.000 100 100 N/A 0.0 754 -6 13 427 427 1 427 1 427 427 0 100.000 100 100 N/A 0.0 869 -7 14 478 478 1 478 1 478 478 0 100.000 100 100 N/A 0.0 972 -8 15 488 488 1 488 1 488 488 0 100.000 100 100 N/A 0.0 1004 -9 16 612 612 1 612 1 612 612 0 100.000 100 100 N/A 0.0 1241 -10 17 638 638 1 638 1 638 638 0 100.000 100 100 N/A 0.0 1326 -11 18 679 679 1 679 1 679 679 0 100.000 100 100 N/A 0.0 1419 -12 1 95 95 1 95 1 95 95 0 100.000 100 100 N/A 1.23e-68 191 -13 19 1645 1645 1 1645 1 1645 1645 0 100.000 100 100 N/A 0.0 3332 -14 2 109 109 1 109 1 109 109 0 100.000 100 100 N/A 3.09e-81 224 -15 3 139 139 1 139 1 139 139 0 100.000 100 100 N/A 2.34e-106 290 -16 4 148 148 1 148 1 148 148 0 100.000 100 100 N/A 1.04e-110 302 -17 5 181 181 1 181 1 181 181 0 100.000 100 100 N/A 1.79e-140 380 -18 6 202 202 1 202 1 202 202 0 100.000 100 100 N/A 3.71e-154 416 -19 7 214 214 1 214 1 214 214 0 100.000 100 100 N/A 1.75e-157 426 -20 8 228 228 1 228 1 228 228 0 100.000 100 100 N/A 6.32e-172 463 diff --git a/tests/test_data/outputs/search/G12/blast/protein/queries.fasta b/tests/test_data/outputs/search/G12/blast/protein/queries.fasta deleted file mode 100755 index a75c35c..0000000 --- a/tests/test_data/outputs/search/G12/blast/protein/queries.fasta +++ /dev/null @@ -1,42 +0,0 @@ ->0 -MYDPPFLEALMITASFFAIFIIIVVSVLLLEGD* ->1 -MRILLSNDDGVHAPGIQTLAKALREFADVQVVAPDRNRSGASNSLTLESSLRTFTFDNGDIAVQMGTPTDCVYLGVNALMRPRPDIVVSGINAGPNLGDDVIYSGTVAAAMEGRHLGFPALAVSLNGYQHYDTAAAVTCALLRGLSREPLRTGRILNVNVPDLPLAQVKGIRVTRCGSRHPADKVIPQEDPRGNTLYWIGPPGDKYDAGPDTDFAAVDEGYVSVTPLHVDLTAHSAHDVVSDWLDSVGVGTQW* ->2 -VNLVKTPGLHAAGKGINVANVLKDLGIDVTVGGFLGKDNQDGFQQLFSELGIANRFQVVQGRTRINVKLTEKDGEVTNFNFPGFDVTPADWERFVNDFLSWLGQFDMVCVSGSLPAGVSPEAFTDWMTRLRSQCPCIIVDTSRVELVAGLIAAPWLVKPNRRELEIWAGRKLPEMKDVIDAAPALREQGIAHVVISLGAEGALRVNASGEWIAKPPAVDVENTVGAGGSMVGGWIYGLLMRESTEHTLRLATAVAALAVSQSNVGITDRPQLAAMMARVDLQPFN* ->3 -MNKNKYSTPLLMLATILAGMLSPMQSAVNGQLGHWLQDGNACAVISFASGLVVMFFIIIARKETRQQFASIPTLIKKRKIPLWNWFAGLCGAMVVFSEGASASALGVATFQTALISALLLSGLLCDRFGIGVEEKKYFTPWRITGALFAVIATIFVVSPQWHSTSFILLAILPFLAGLLAGWQPAGNAKVAEATGSMLVSITWNFIVGFCVLGAALAIRIALGHVTIQLPDTWWMYLGGPLGLLSIGLMAILVRGLGLLMLGVASTAGQLLGSVLIDELIPSLGNTVYLVTIIGTLFALVGAIVTTIPEYRASKMAQKMEVSE* ->4 -MNKNKYSTPLLMLATILAGMLSPMQSAVNGQLGHWLQDGNACAVISFASGLVVMFFIIIARKETRQQFASIPTLIKKRKIPLWNWFAGLCGAMVVFSEGASASALGVATFQTALISALLLSGLLCDRFGIGVEEKKYFTPWRITGALFAVIATIFVVSPQWHSTSFILLAILPFLAGLLAGWQPAGNAKVAEATGSMLVSITWNFIVGFCVLGAALAIRIALGHVTIQLPDTWWMYLGGPLGLLSIGLMAILVRGLGLLMLGVASTAGQLLGSVLIDELIPSLGNTVYLVTIIGTLFALVGAIVTTIPEYRASKMAQKMEVSE* ->5 -MKGRWAKYVATGVMLAMLAACSSKPTDRGQQYKDGKFTQPFSLVNQPDAVGAPINAGDFAEQVDQIRSASPRLYTNQSNVYNAVQNWLRSGGDTRTMRQFGIDAWQMEGTDNYGNVQFTGYYTPVVQARHTRQGAFQYPIYSMPPKRGRLPSRAQIYAGALSDKYILAWSNSLMDNFIMDVQGSGYIDFGDGSPLNFFSYAGKNGWPYRSIGKVLIDRGEVKKEDMSMQAIREWGEKHSEAEVRELLEQNPSFVFFKPQSFAPVKGASAVPLIGRASVASDRSIIPPGTTLLAEVPLLDNNGKFSGQYELRLMVALDVGGAIKGQHFDIYQGIGPDAGHRAGWYNHYGRVWVLKSAPGAGNVFSG* ->6 -LFDRYDAGEQAVLVHIYFSQDKDMEDLQEFESLVYYAGVEAMQVITGSRKAPHPKYFVGEGKAVEIAEAVKATGAAVVLFDHALSPAQERNLERLCECRVIDRTGLILDIFAQRARTHEGKLQVELAQLRHLATRLVRGWTHLERRKGGIGSRGPGETQLEADRRLLRNRIVQIQSRLEKVEKQREQGRQSRIKADVPTVSLVGYTNAGKSTLFNQITEARVYAADQLFATLDPTLRRIDVADVGETVLADTVGFIRHLPYDLVAAFKATLQETRQATQLLHVVDAADVRVQENIEAVNTVLEEIDAHEFPTLMVMNKIDMLDDFEPRIDRDEENKPIRVWLSAQSGVGIPQLFQALTERLSGEVAQHTLRLLPQEGRLRSRFYQLQAIEKEWMEEDGSVSPQVRMPIVDWRRFCKQEPALIEYVI* ->7 -VAQRILVLGASGYIGQHLVFALSQQGHQVRAAARRVERLEKHRLANVSCHKVDLHWPENLPALLRDIDTVYYLVHGMGEGGDFIAHERQAALNVRDALRQTPVKQLIFLSSLQAPAHEQSDHLRARQLTADTLRDAGVPVTELRAGIIVGAGSAAFEVMRDMVYNLPILTPPRWVRSRTTPIALENLLYYLVGLLEHPAHEHRILEAAGPQVLSYQQQFERFMAVSGKRRPLIPVPFPTRWISVWFLNVITSVPPTTAKALIQGLRHDLLADDAALKKLIPQTLITFDDAVRRTLKEEEKLVNSSDWGYDALAFARWRPEYGYFPKQAGFTAQTPASLSALWQVVNRLGGKEGYFFGNILWQTRAAMDRLVGHKLAKGRPSHTLLKPGDTVDSWKVIIVEPEKQLTLLFGMKAPGLGRLSFTLHDKGRYREIDVRAWWHPHGMPGLIYWLLMIPAHLFIFRGMARRIARLAEQITEK* ->8 -MNKFARHFLPLSLRVRFLLATAGVVLVLSLAYGIVALVGYSVSFDKTTFRLLRGESNLFYTLAKWENNKICVELPENLDMQSPTMTVIYDETGKLLWTQRNIPWLIKSTQPEGLKTNGFHEIETNVDATSTLLSEDHSAQEKLKEVREDDDDAEMTHSVAVNIYPATTRMPQLTIVVVDTIPIELKGSYMVWSWFVYVLAANLLLVIPLLWIAAWWSLRPIDALSREVRELEDHHREMLNPETTRKLISLVRNLNQLLKSEHERYNKYRTSLTDLTHSLKTPLAVLQSTLRSLRNEKMSVSKAEPVMLEQISRISQQIGYYLHRASMRGSGVLLSRELHPVAPLLDNLISALNKVYLRKGVNISMDISPEISFVGEQNDFVEVMGNVLDNACKYCLEFVEISARQTDDHLHIFVEDDGPGIAHSKRSLVFDRGQRADTLRPGQGVGLAVAREITEQYAGQIIASDSLLGGARMEVVSGRQHPTQKEE* ->9 -MTIQKRLLEAVEQKLLRPIDAQFALTVAGNDDPAVTLAAALLSHDAGEGHVCLPLSRLTLTEEAHPLLVAWISETATPIDWKKRLLASAAVSCGDSPAPLILCGDRLYLNRMWCNERTVARFFNEVNQAIAVDEDQLSRILDALFPPTDEVNWQKVAAAVALTRRISVISGGPGTGKTTTVAKLLAALIQMADGERCRIRLAAPTGKAAARLTESLGAALRQLPLTDAQKKRIPEDASTLHRLLGAQPGSQRLRHHAGNPLHLDVLVVDEASMIDLPMMSRLIDALPPHGRVIFLGDRDQLASVEAGAVLGDICAYVNAGFTAERARQLSRLTGSAIPAGAGTQAASLRDSLCLLQKSYRFGSDSGIGKLAAAINCGDRSAIQAVFQQGFSDIEKRTLQSSDDYAGMLDEALAGYGRYLRLLHEKAAPEAILQAFNEYQLLCALREGPFGVRGLNDRIEQAMVQQRKIQRHPHSRWYEGRPVMIARNDSALGLFNGDIGIALDRGQGLRVWFVMPDGTIKSVQPSRLPEHDTTWAMTVHKSQGSEFDHAALILPSQRSPVVTRELVYTAVTRARRRLSLYADERILAGAIVTRTERRSGLATLFDEVSRIG* ->10 -MQEVAMSSQEASKMLRTYNIAWWGNNYYDVNELGHISVCPDPDVPEARVDLAKLVKAREAQGQRLPALFCFPQILQHRLRSINAAFKRARESYGYNGDYFLVYPIKVNQHRRVIESLIHSGEPLGLEAGSKAELMAVLAHAGMTRSVIVCNGYKDREYIRLALIGEKMGHKVYLVIEKMSEIAIVLEEAERLNVVPRLGVRARLASQGSGKWQSSGGEKSKFGLAATQVLQLVETLRDAGRLDSLQLLHFHLGSQMANIRDIATGVRESARFYVELHKLGVNIQCFDVGGGLGVDYEGTRSQSDCSVNYGLNEYANNIIWAIGDACEEHGLPHPTVITESGRAVTAHHTVLVSNIIGVERNEYTDPTAPAEDAPRALQNLWETWQEMHKPGTRRSLREWLHDSQMDLHDIHIGYSSGAFSLQERAWAEQLYLSMCHEVQKQLDPQNRAHRPIIDELQERMADKMYVNFSLFQSMPDAWGIDQLFPVLPLEGLDQVPERRAVLLDITCDSDGAIDHYIDGDGIATTMPMPEYDPENPPMLGFFMVGAYQEILGNMHNLFGDTEAVDVFVFPDGSVEVELSDEGDTVADMLQYVQLDPKTLLTHFRDQVKQTDLDDALQQQFLEEFEAGLYGYTYLEDE* ->11 -MNSLPQRSTDFELTTSQDGFALSWQQRLILRHSAENPCLWIGAGVADIDMFRGNFSIKDKLNEKIALTEATVSELPDGWLVQFSRGATISATLRISADEAGRLTLDLQNDDLHHNRIWLRLAANPDDHIYGCGEQFSYFDLRGKPFPLWTSEQGVGRNKTSYVTWQADCKENAGGDYYLTFFPQPTFVSTQKYYCHVDNSCYMNFDFSAPEYHELALWEDKTTLRFECADTYIALLEKLTALLGRQPELPDWVYDGVTLGIQGGTEVCQQKLDNMRNAGVKVYGIWAQDWSGIRMTSFGKRVMWNWKWNSDNYPQLDSRIKQWKEEGVQFLSYINPYVASDKDLCAEAARHGYLAKDATGGDYLVEFGEFYGGVVDLTNPEAYDWFKDVIKKNMIALGCSGWMADFGEYLPTDTYLHNGVSAELMHNAWPALWAKCNYEALQKTGKLGEILFFMRAGYTGSQKYSTMMWAGDQNVDWSLDDGLASVVPAALSLAMTGHGLHHSDIGGYTTLFDMKRSKELLLRWCDFSAFTPMMRTHEGNRPSNDWQFDGDAETIAHFARMTTVFTTLKPYLKQAVAQNAATGLPVMRPLFLHYENDAATYTLKYQYLLGQDLLVAPVHEQGRCDWTLYLPEDHWVNIWTGEAHHGGEISVDAPIGKPPVFYRAKSEWALLFASLRNI* ->12 -MKLFLTTAALTATLTSGMGFASDTVIPWATNSGGTESTHIAAMGEDVNAQHQHIIHTHEGVCAANSGTIQADEAALTSNKPPVQVQPELLPHQG* ->13 -MKHLRVVACMIMLALAGCDNNDKTAPTTKSEAPAVAQPSPAQDPSQLQKLAQQSQGKALTLLDASEAQLDGAATLVLTFSIPLDPEQDFSRVVHVVDKKSGSVDGAWELAPNLKELRLRHLEPERVLVVTVDPAVKALNNATFGKSYEKTITTRDVQPSVGFASRGSLLPGKIAEGLPVMALNVNHVDVNFFRVKPGSLASFVSQWEYRSSLSNWESDNLLKMADLVYTGRFDLNPARNTREKLLLPLSDIKPLQQAGVYVAVMNQAGHYNYSNAATLFTLSDIGVSAHRYHSRLDIFTQSLENGAAQSGIEIVLLNDKGQTLAQATSDAQGHVQLEADKAAALLLARKEGQTTLLDLTLPALDLSEFNVAGAPGYSKQFFMFGPRDLYRPGETVILNGLLRDSDGKTLPDQPVKLEVVKPDGQVMRTVVSQPENGLYRLNYPLDINAPTGLWHVRANTGDNLLRSWDFHVEDFMPERMALNLTAQKTPLAPADEVKFSVVGYYLYGAPANGNTLQGQLFLRPLRDAVAALPGFQFGNIAEENLSRSLDEVQLTLDKGGRGEVSAASQWQEAHSPLQVILQASLLESGGRPVTRRVEQAIWPADTLPGIRPQFAAKAVYDYRTDTTVNQPIVDEDSNAAFDIVYANAQGEKKAVSGLQVRLIRERRDYYWNWSESEGWQSQFDQKDLVEGEQTLDLNADETGKVSFPVEWGAYRLEVKAPNETVSSVRFWAGYSWQDNSDGSGAARPDRVTLKLDKANYRPGDTMKLHIAAPVAGKGYAMVESSDGPLWWQAIDVPAQGLELTIPVDKTWNRHDLYLSTLVVRPGDKSRSATPKRAVGLLHLPLGDDNRRLDLALESPAKMRPNQPLTVRVKASVKHGEMPKQINVLVSAVDSGVLNITDYATPDPWQAFFGQKRYGADIYDIYGQVIEGQGRLAALRFGGDGDDLTRGGKPPVNHANIIAQQAQPITLNEQGEGVVTLPIGDFNGELRVMAQAWTADDFGRGESKVVVAAPVIAELNMPRFLAGGDVSRLVLDVTNLTDRPQTLNIALAASGLLELLSQQPQPVNLAPGVRTTLFVPVRALEGFGEGEIQATISGLNLPGETLGAQHKQWQIGVRPAWPAQTVNSGIALAPGESWHVPEQHLANVSPATLQGQLLLSGKPPLNLARYIRELKAYPYGCLEQTTSGLFPALYTNAAQLQSLGITGDSDEKRRAAVDIGISRILQMQRDNGGFALWDENGAEEPWLTAYAMDFLIRAGEQGYSVPPEAINRGNERLLRYLQDPGTMLIRYSDNTQASTFAAQAYAALVLARQQKAPLGALREIWERRSQAASGLPLMQLGIALNTMGDARRGEEAITLALNTPRQDERQWIADYGSSLRDNALMLSLLEENNLRPDAQNALLSSLSEQAFGQRWLSTQENNALFLAAHSRQASAGAWQAQTSLEAQPLSGDKALTRNLDADQLAALEVTNTGSQPLWLRLDSSGYPSSAPEPASNVLQIERQILGTDGQRKSLSSLRSGELVLVWLTVVADRNVPDALVVDLLPAGLELENQNLADSSASLPESGSEVQNLLNQMQQADIQYMEFRDDRFVAAVVVNEGQPVTLVYLARAVTPGTYQLAQPQVESMYAPQWRATGASEGLLIVTP* ->14 -MAKNRSRRLRKKMHIDEFQELGFSVAWRFPEGTSEEQIDKTVDDFINDVIEPNKLAFDGSGYLAWEGLICMQEIGKCTEEHQAIVRKWLEARNLEEVRTSELFDVWWD* ->15 -LKLIPFYLLALFSASSGATEINACKDLIGTWKTPADNPPYTVTILPPVEACGEKCVKLNVEYELDVTHRNALYCHERQEGVKGQGPMVIAFEGAYGGHAIGTYNRQLQLLRAGVIPKNKKWKWITKMENYWFRRVKAH* ->16 -MKKFDSCHPVFLLIGCAQVPLRSSVSKPVQQPSAQKEQLANANGIDECQSLPYVPSDLAKNKSLSNQIADNTASKNSAISSRIFCEKYKQTKEQALTFFQELPQYMRSKEVEEQHMTEFKKVLLEPGSKNLSIYQTLLAAHERLQAL* ->17 -MRIKPDDNWRWYYDEEHDRMMLDLANGMLFRSRFSRKMLTPDAFCPTGFCVDDAALYFSFEEKCRDFELTKEQRAELVLNALVAIRYLKPQMPKSWHFVAHGEMWTPGTGDAASVWLSDTAEQVNLLVVEPGENAALCLLAQPGVVIAGRTMQLGDAIKIMNDRLKPQVHCHSFSLEQAV* ->18 -VLAFTLRFIKNKRYFAILAGALVIIAGLDSQHACSGNGLPQINGKALAALAKQHPVVVLFRHAERCDRSDNTCLPDSTGITVNGAQDARALGKAFSADIQNYNLYSSNTVRAIQSATWFSAGRSRRAVKKMMDCGSGIYSSINTLLKNSQIKNIVILTHNHCLTYIVKNKRGVKFDPEYLNALVMYAENGKLLLDGEFVPG* ->19 -MGSNYIVIEGLEGAGKTTARDVVVETLEQLGIRNMIFTREPGGTQLAEKLRSLVLDIRSVGDEVITDKAEVLMFYAARVQLVETVIKPALAQGVWVIGDRHDLSTQAYQGGGRGIDQTMLATLRDAVLGDFRPDLTLYLDVTPEVGLKRARARGDLDRIEQESFDFFNRTRARYLELAAQDSRIRTIDATQPLDAVMRDIRATVTKWVQEQAA* ->20 -MKHIKKSVLVVLLTSHVAHASIVVGGTRLVFDGNNDESSINVENKDSKANLVQSWLSVADPQVTNKQVFIITPPLFRLDAGQKNSIRVIRSGAPLPADRESMYWLNIKGIPSIDDNASANRVEISINTQIKLIYRPPALTKSTPDSQSQQLKWQTAGDVITVNNPTPYYMNFASVTLNSHEVKSATFVPPKSSASFKLSSTAAPHGTVTWRLISDYGMSLEPHSGSF* diff --git a/tests/test_data/outputs/search/G12/run.json b/tests/test_data/outputs/search/G12/run.json index af189eb..ca6fb91 100755 --- a/tests/test_data/outputs/search/G12/run.json +++ b/tests/test_data/outputs/search/G12/run.json @@ -1,11 +1,15 @@ { - "analysis_start_time": "10/06/2024 11:12:38", + "analysis_start_time": "19/06/2024 10:44:02", "parameters": { - "query": "locidex/extract/G12/raw.extracted.seqs.fasta", - "outdir": "locidex/search/G12", + "command": "search", + "query": "test_dev/extract/G12/raw.extracted.seqs.fasta", + "outdir": "test_dev/search/G12", + "db": "test_set/db", + "db_group": null, "name": "G12", - "db": "locidex/db", "config": null, + "db_name": "Locidex Test Database", + "db_version": "1.0.0", "min_evalue": 0.0001, "min_dna_len": 1, "min_aa_len": 1, @@ -20,8 +24,16 @@ "format": null, "translation_table": 11, "annotate": false, - "force": true + "force": true, + "db_date": "2024/06/06", + "db_author": "James Robertson", + "db_desc": "Using Salmonella GCA_000007545.2 as source data", + "db_num_seqs": 20, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" }, - "result_file": "locidex/search/G12/seq_store.json", - "analysis_end_time": "10/06/2024 11:12:40" + "result_file": "test_dev/search/G12/seq_store.json", + "analysis_end_time": "19/06/2024 10:44:02" } \ No newline at end of file diff --git a/tests/test_data/outputs/search/G12/seq_store.json b/tests/test_data/outputs/search/G12/seq_store.json index 1fd6a02..29c305c 100755 --- a/tests/test_data/outputs/search/G12/seq_store.json +++ b/tests/test_data/outputs/search/G12/seq_store.json @@ -460,274 +460,316 @@ "parent_id": "locus_1:0:0:0", "locus_name": "locus_1:0:0:0", "seq_id": "locus_1:0:0:0", + "dna_seq": "atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa", + "dna_ambig_count": 0, "dna_hash": "d17b02d12afa7f832ee37df6f24a8f55", "dna_len": 102, + "aa_seq": "MYDPPFLEALMITASFFAIFIIIVVSVLLLEGD*", "aa_hash": "a931d1f75114576e60538364eb01a05f", "aa_len": 34, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "1": { "parent_id": "locus_10:9:0:1", "locus_name": "locus_10:9:0:1", "seq_id": "locus_10:9:0:1", + "dna_seq": "atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa", + "dna_ambig_count": 0, "dna_hash": "fe04d17ec353c08b903c85fc0ca4dc02", "dna_len": 762, + "aa_seq": "MRILLSNDDGVHAPGIQTLAKALREFADVQVVAPDRNRSGASNSLTLESSLRTFTFDNGDIAVQMGTPTDCVYLGVNALMRPRPDIVVSGINAGPNLGDDVIYSGTVAAAMEGRHLGFPALAVSLNGYQHYDTAAAVTCALLRGLSREPLRTGRILNVNVPDLPLAQVKGIRVTRCGSRHPADKVIPQEDPRGNTLYWIGPPGDKYDAGPDTDFAAVDEGYVSVTPLHVDLTAHSAHDVVSDWLDSVGVGTQW*", "aa_hash": "988bf512f0362e276b0e5622fbaa7079", "aa_len": 254, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "2": { "parent_id": "locus_11:10:0:2", "locus_name": "locus_11:10:0:2", "seq_id": "locus_11:10:0:2", + "dna_seq": "gtgaacctggtgaaaaccccgggtctgcacgcggcgggcaaaggcattaacgttgccaatgtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccaacttcaactttcccggctttgacgtgactcccgcagactgggaacgctttgttaacgacttcctgagctggctgggtcagttcgatatggtctgcgtaagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatcgttgatactagccgtgtagagttagtcgccggtcttatagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcccgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgcgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggaaaataccgtcggcgcaggcggttccatggttggcggatggatttacggcctgctgatgcgcgagtccaccgaacatacgctgcgcctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga", + "dna_ambig_count": 0, "dna_hash": "5b128d659955716833ce42f2bb060212", "dna_len": 858, + "aa_seq": "VNLVKTPGLHAAGKGINVANVLKDLGIDVTVGGFLGKDNQDGFQQLFSELGIANRFQVVQGRTRINVKLTEKDGEVTNFNFPGFDVTPADWERFVNDFLSWLGQFDMVCVSGSLPAGVSPEAFTDWMTRLRSQCPCIIVDTSRVELVAGLIAAPWLVKPNRRELEIWAGRKLPEMKDVIDAAPALREQGIAHVVISLGAEGALRVNASGEWIAKPPAVDVENTVGAGGSMVGGWIYGLLMRESTEHTLRLATAVAALAVSQSNVGITDRPQLAAMMARVDLQPFN*", "aa_hash": "d6a46f107d0604f27820147b523948c8", "aa_len": 286, "start_codon": "gtg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "3": { "parent_id": "locus_12:11:0:3", "locus_name": "locus_12:11:0:3", "seq_id": "locus_12:11:0:3", + "dna_seq": "atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga", + "dna_ambig_count": 0, "dna_hash": "eb72da68c159497d5f0c8eeddc51b5ae", "dna_len": 972, + "aa_seq": "MNKNKYSTPLLMLATILAGMLSPMQSAVNGQLGHWLQDGNACAVISFASGLVVMFFIIIARKETRQQFASIPTLIKKRKIPLWNWFAGLCGAMVVFSEGASASALGVATFQTALISALLLSGLLCDRFGIGVEEKKYFTPWRITGALFAVIATIFVVSPQWHSTSFILLAILPFLAGLLAGWQPAGNAKVAEATGSMLVSITWNFIVGFCVLGAALAIRIALGHVTIQLPDTWWMYLGGPLGLLSIGLMAILVRGLGLLMLGVASTAGQLLGSVLIDELIPSLGNTVYLVTIIGTLFALVGAIVTTIPEYRASKMAQKMEVSE*", "aa_hash": "2fb77f6d0f615a030a08af74f3eda277", "aa_len": 324, "start_codon": "atg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "4": { "parent_id": "locus_12:11:0:4", "locus_name": "locus_12:11:0:4", "seq_id": "locus_12:11:0:4", + "dna_seq": "atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga", + "dna_ambig_count": 0, "dna_hash": "eb72da68c159497d5f0c8eeddc51b5ae", "dna_len": 972, + "aa_seq": "MNKNKYSTPLLMLATILAGMLSPMQSAVNGQLGHWLQDGNACAVISFASGLVVMFFIIIARKETRQQFASIPTLIKKRKIPLWNWFAGLCGAMVVFSEGASASALGVATFQTALISALLLSGLLCDRFGIGVEEKKYFTPWRITGALFAVIATIFVVSPQWHSTSFILLAILPFLAGLLAGWQPAGNAKVAEATGSMLVSITWNFIVGFCVLGAALAIRIALGHVTIQLPDTWWMYLGGPLGLLSIGLMAILVRGLGLLMLGVASTAGQLLGSVLIDELIPSLGNTVYLVTIIGTLFALVGAIVTTIPEYRASKMAQKMEVSE*", "aa_hash": "2fb77f6d0f615a030a08af74f3eda277", "aa_len": 324, "start_codon": "atg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "5": { "parent_id": "locus_13:12:0:5", "locus_name": "locus_13:12:0:5", "seq_id": "locus_13:12:0:5", + "dna_seq": "atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga", + "dna_ambig_count": 0, "dna_hash": "8f300259dcb46224bdc1fe5273107324", "dna_len": 1098, + "aa_seq": "MKGRWAKYVATGVMLAMLAACSSKPTDRGQQYKDGKFTQPFSLVNQPDAVGAPINAGDFAEQVDQIRSASPRLYTNQSNVYNAVQNWLRSGGDTRTMRQFGIDAWQMEGTDNYGNVQFTGYYTPVVQARHTRQGAFQYPIYSMPPKRGRLPSRAQIYAGALSDKYILAWSNSLMDNFIMDVQGSGYIDFGDGSPLNFFSYAGKNGWPYRSIGKVLIDRGEVKKEDMSMQAIREWGEKHSEAEVRELLEQNPSFVFFKPQSFAPVKGASAVPLIGRASVASDRSIIPPGTTLLAEVPLLDNNGKFSGQYELRLMVALDVGGAIKGQHFDIYQGIGPDAGHRAGWYNHYGRVWVLKSAPGAGNVFSG*", "aa_hash": "fd5140611af6e0d9f95426bf9beaa8b1", "aa_len": 366, "start_codon": "atg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "6": { "parent_id": "locus_14:13:0:6", "locus_name": "locus_14:13:0:6", "seq_id": "locus_14:13:0:6", + "dna_seq": "ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctattacgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccagcgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcggaaaggcgggattggttcgcgcggtccgggtgaaacccagctcgaagccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgagaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgcagctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaattccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtccgcaggtacgaatgccgattgtcgactggcgtcgcttctgtaaacaagaaccagcgttgatcgaatacgtgatctag", + "dna_ambig_count": 0, "dna_hash": "b9060019038526aa6fc38d2f7510edc6", "dna_len": 1281, + "aa_seq": "LFDRYDAGEQAVLVHIYFSQDKDMEDLQEFESLVYYAGVEAMQVITGSRKAPHPKYFVGEGKAVEIAEAVKATGAAVVLFDHALSPAQERNLERLCECRVIDRTGLILDIFAQRARTHEGKLQVELAQLRHLATRLVRGWTHLERRKGGIGSRGPGETQLEADRRLLRNRIVQIQSRLEKVEKQREQGRQSRIKADVPTVSLVGYTNAGKSTLFNQITEARVYAADQLFATLDPTLRRIDVADVGETVLADTVGFIRHLPYDLVAAFKATLQETRQATQLLHVVDAADVRVQENIEAVNTVLEEIDAHEFPTLMVMNKIDMLDDFEPRIDRDEENKPIRVWLSAQSGVGIPQLFQALTERLSGEVAQHTLRLLPQEGRLRSRFYQLQAIEKEWMEEDGSVSPQVRMPIVDWRRFCKQEPALIEYVI*", "aa_hash": "05bc7823b1abc2e6d4e2c08ca5325134", "aa_len": 427, "start_codon": "ttg", - "stop_codon": "tag", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tag", + "count_internal_stop": 0 }, "7": { "parent_id": "locus_15:14:0:7", "locus_name": "locus_15:14:0:7", "seq_id": "locus_15:14:0:7", + "dna_seq": "gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga", + "dna_ambig_count": 0, "dna_hash": "bc98c2fe196a68a79036814396513a8d", "dna_len": 1434, + "aa_seq": "VAQRILVLGASGYIGQHLVFALSQQGHQVRAAARRVERLEKHRLANVSCHKVDLHWPENLPALLRDIDTVYYLVHGMGEGGDFIAHERQAALNVRDALRQTPVKQLIFLSSLQAPAHEQSDHLRARQLTADTLRDAGVPVTELRAGIIVGAGSAAFEVMRDMVYNLPILTPPRWVRSRTTPIALENLLYYLVGLLEHPAHEHRILEAAGPQVLSYQQQFERFMAVSGKRRPLIPVPFPTRWISVWFLNVITSVPPTTAKALIQGLRHDLLADDAALKKLIPQTLITFDDAVRRTLKEEEKLVNSSDWGYDALAFARWRPEYGYFPKQAGFTAQTPASLSALWQVVNRLGGKEGYFFGNILWQTRAAMDRLVGHKLAKGRPSHTLLKPGDTVDSWKVIIVEPEKQLTLLFGMKAPGLGRLSFTLHDKGRYREIDVRAWWHPHGMPGLIYWLLMIPAHLFIFRGMARRIARLAEQITEK*", "aa_hash": "5d8c3cf06444c3741ebfc57cc92db0d3", "aa_len": 478, "start_codon": "gtg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "8": { "parent_id": "locus_16:15:0:8", "locus_name": "locus_16:15:0:8", "seq_id": "locus_16:15:0:8", + "dna_seq": "atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctcgtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatctgcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacggtgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcactcaaccggaagggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaaggctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgatgcgctgtcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtaagctgatcagccttgtgcgcaaccttaatcaactgctcaaaagcgagcacgaacgttataacaaataccgcacgagcctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatctgcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcatagcccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttctggccgacagcatcccacacagaaagaggaataa", + "dna_ambig_count": 0, "dna_hash": "16e55766c603fe33c9e75d8e81743ae2", "dna_len": 1464, + "aa_seq": "MNKFARHFLPLSLRVRFLLATAGVVLVLSLAYGIVALVGYSVSFDKTTFRLLRGESNLFYTLAKWENNKICVELPENLDMQSPTMTVIYDETGKLLWTQRNIPWLIKSTQPEGLKTNGFHEIETNVDATSTLLSEDHSAQEKLKEVREDDDDAEMTHSVAVNIYPATTRMPQLTIVVVDTIPIELKGSYMVWSWFVYVLAANLLLVIPLLWIAAWWSLRPIDALSREVRELEDHHREMLNPETTRKLISLVRNLNQLLKSEHERYNKYRTSLTDLTHSLKTPLAVLQSTLRSLRNEKMSVSKAEPVMLEQISRISQQIGYYLHRASMRGSGVLLSRELHPVAPLLDNLISALNKVYLRKGVNISMDISPEISFVGEQNDFVEVMGNVLDNACKYCLEFVEISARQTDDHLHIFVEDDGPGIAHSKRSLVFDRGQRADTLRPGQGVGLAVAREITEQYAGQIIASDSLLGGARMEVVSGRQHPTQKEE*", "aa_hash": "f85b3701f5642454bf4d2263feb13354", "aa_len": 488, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "9": { "parent_id": "locus_17:16:0:9", "locus_name": "locus_17:16:0:9", "seq_id": "locus_17:16:0:9", + "dna_seq": "atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa", + "dna_ambig_count": 0, "dna_hash": "a0d97d985483413f3c18bfe5833ae9ce", "dna_len": 1836, + "aa_seq": "MTIQKRLLEAVEQKLLRPIDAQFALTVAGNDDPAVTLAAALLSHDAGEGHVCLPLSRLTLTEEAHPLLVAWISETATPIDWKKRLLASAAVSCGDSPAPLILCGDRLYLNRMWCNERTVARFFNEVNQAIAVDEDQLSRILDALFPPTDEVNWQKVAAAVALTRRISVISGGPGTGKTTTVAKLLAALIQMADGERCRIRLAAPTGKAAARLTESLGAALRQLPLTDAQKKRIPEDASTLHRLLGAQPGSQRLRHHAGNPLHLDVLVVDEASMIDLPMMSRLIDALPPHGRVIFLGDRDQLASVEAGAVLGDICAYVNAGFTAERARQLSRLTGSAIPAGAGTQAASLRDSLCLLQKSYRFGSDSGIGKLAAAINCGDRSAIQAVFQQGFSDIEKRTLQSSDDYAGMLDEALAGYGRYLRLLHEKAAPEAILQAFNEYQLLCALREGPFGVRGLNDRIEQAMVQQRKIQRHPHSRWYEGRPVMIARNDSALGLFNGDIGIALDRGQGLRVWFVMPDGTIKSVQPSRLPEHDTTWAMTVHKSQGSEFDHAALILPSQRSPVVTRELVYTAVTRARRRLSLYADERILAGAIVTRTERRSGLATLFDEVSRIG*", "aa_hash": "bdd2b967a7a6337b5973dd723a62fa43", "aa_len": 612, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "10": { "parent_id": "locus_18:17:0:10", "locus_name": "locus_18:17:0:10", "seq_id": "locus_18:17:0:10", + "dna_seq": "atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag", + "dna_ambig_count": 0, "dna_hash": "b3021e979faa7600756c06dfadfcf14c", "dna_len": 1914, + "aa_seq": "MQEVAMSSQEASKMLRTYNIAWWGNNYYDVNELGHISVCPDPDVPEARVDLAKLVKAREAQGQRLPALFCFPQILQHRLRSINAAFKRARESYGYNGDYFLVYPIKVNQHRRVIESLIHSGEPLGLEAGSKAELMAVLAHAGMTRSVIVCNGYKDREYIRLALIGEKMGHKVYLVIEKMSEIAIVLEEAERLNVVPRLGVRARLASQGSGKWQSSGGEKSKFGLAATQVLQLVETLRDAGRLDSLQLLHFHLGSQMANIRDIATGVRESARFYVELHKLGVNIQCFDVGGGLGVDYEGTRSQSDCSVNYGLNEYANNIIWAIGDACEEHGLPHPTVITESGRAVTAHHTVLVSNIIGVERNEYTDPTAPAEDAPRALQNLWETWQEMHKPGTRRSLREWLHDSQMDLHDIHIGYSSGAFSLQERAWAEQLYLSMCHEVQKQLDPQNRAHRPIIDELQERMADKMYVNFSLFQSMPDAWGIDQLFPVLPLEGLDQVPERRAVLLDITCDSDGAIDHYIDGDGIATTMPMPEYDPENPPMLGFFMVGAYQEILGNMHNLFGDTEAVDVFVFPDGSVEVELSDEGDTVADMLQYVQLDPKTLLTHFRDQVKQTDLDDALQQQFLEEFEAGLYGYTYLEDE*", "aa_hash": "42c4a831ee79a27c47138fe96829814b", "aa_len": 638, "start_codon": "atg", - "stop_codon": "tag", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tag", + "count_internal_stop": 0 }, "11": { "parent_id": "locus_19:18:0:11", "locus_name": "locus_19:18:0:11", "seq_id": "locus_19:18:0:11", + "dna_seq": "atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacatcgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccacagtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggtcgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattacttgaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcatacagggcggtacggaagtttgtcagcaaaaactggataacatgcgcaacgcaggcgtaaaagtgtacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggggtcagcgcagagctcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacagaagaccggcaagctcggcgagatcctgttctttatgcgggcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccagcaatgactggcagttcgacggcgacgcggaaactattgcacactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgagatttccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa", + "dna_ambig_count": 0, "dna_hash": "a012eee23637b48e39b00808a057e35d", "dna_len": 2037, + "aa_seq": "MNSLPQRSTDFELTTSQDGFALSWQQRLILRHSAENPCLWIGAGVADIDMFRGNFSIKDKLNEKIALTEATVSELPDGWLVQFSRGATISATLRISADEAGRLTLDLQNDDLHHNRIWLRLAANPDDHIYGCGEQFSYFDLRGKPFPLWTSEQGVGRNKTSYVTWQADCKENAGGDYYLTFFPQPTFVSTQKYYCHVDNSCYMNFDFSAPEYHELALWEDKTTLRFECADTYIALLEKLTALLGRQPELPDWVYDGVTLGIQGGTEVCQQKLDNMRNAGVKVYGIWAQDWSGIRMTSFGKRVMWNWKWNSDNYPQLDSRIKQWKEEGVQFLSYINPYVASDKDLCAEAARHGYLAKDATGGDYLVEFGEFYGGVVDLTNPEAYDWFKDVIKKNMIALGCSGWMADFGEYLPTDTYLHNGVSAELMHNAWPALWAKCNYEALQKTGKLGEILFFMRAGYTGSQKYSTMMWAGDQNVDWSLDDGLASVVPAALSLAMTGHGLHHSDIGGYTTLFDMKRSKELLLRWCDFSAFTPMMRTHEGNRPSNDWQFDGDAETIAHFARMTTVFTTLKPYLKQAVAQNAATGLPVMRPLFLHYENDAATYTLKYQYLLGQDLLVAPVHEQGRCDWTLYLPEDHWVNIWTGEAHHGGEISVDAPIGKPPVFYRAKSEWALLFASLRNI*", "aa_hash": "cb1202450e68e2b4f0d557a645f1a98d", "aa_len": 679, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "12": { "parent_id": "locus_2:1:0:12", "locus_name": "locus_2:1:0:12", "seq_id": "locus_2:1:0:12", + "dna_seq": "atgaaactttttctgacaacggcagcgctaactgcgaccttgacttccggcatgggatttgccagcgataccgtgatcccctgggcgactaacagtggcggtacggaaagcacgcatatcgcggcaatgggcgaggatgtgaatgcgcagcatcagcacatcatccacacgcatgaaggcgtatgtgccgcaaattccggcaccattcaggctgatgaagccgcgctgaccagcaacaagccaccggtacaagtccaaccggaattgttgccgcatcagggctaa", + "dna_ambig_count": 0, "dna_hash": "e35184c8ff18e9116fc8faef20532f56", "dna_len": 285, + "aa_seq": "MKLFLTTAALTATLTSGMGFASDTVIPWATNSGGTESTHIAAMGEDVNAQHQHIIHTHEGVCAANSGTIQADEAALTSNKPPVQVQPELLPHQG*", "aa_hash": "2a1a77c25ad681437705d9145aef608c", "aa_len": 95, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "13": { "parent_id": "locus_20:19:0:13", "locus_name": "locus_20:19:0:13", "seq_id": "locus_20:19:0:13", + "dna_seq": "atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa", + "dna_ambig_count": 0, "dna_hash": "4461918e985715e4a2b07494e1f91326", "dna_len": 4935, + "aa_seq": "MKHLRVVACMIMLALAGCDNNDKTAPTTKSEAPAVAQPSPAQDPSQLQKLAQQSQGKALTLLDASEAQLDGAATLVLTFSIPLDPEQDFSRVVHVVDKKSGSVDGAWELAPNLKELRLRHLEPERVLVVTVDPAVKALNNATFGKSYEKTITTRDVQPSVGFASRGSLLPGKIAEGLPVMALNVNHVDVNFFRVKPGSLASFVSQWEYRSSLSNWESDNLLKMADLVYTGRFDLNPARNTREKLLLPLSDIKPLQQAGVYVAVMNQAGHYNYSNAATLFTLSDIGVSAHRYHSRLDIFTQSLENGAAQSGIEIVLLNDKGQTLAQATSDAQGHVQLEADKAAALLLARKEGQTTLLDLTLPALDLSEFNVAGAPGYSKQFFMFGPRDLYRPGETVILNGLLRDSDGKTLPDQPVKLEVVKPDGQVMRTVVSQPENGLYRLNYPLDINAPTGLWHVRANTGDNLLRSWDFHVEDFMPERMALNLTAQKTPLAPADEVKFSVVGYYLYGAPANGNTLQGQLFLRPLRDAVAALPGFQFGNIAEENLSRSLDEVQLTLDKGGRGEVSAASQWQEAHSPLQVILQASLLESGGRPVTRRVEQAIWPADTLPGIRPQFAAKAVYDYRTDTTVNQPIVDEDSNAAFDIVYANAQGEKKAVSGLQVRLIRERRDYYWNWSESEGWQSQFDQKDLVEGEQTLDLNADETGKVSFPVEWGAYRLEVKAPNETVSSVRFWAGYSWQDNSDGSGAARPDRVTLKLDKANYRPGDTMKLHIAAPVAGKGYAMVESSDGPLWWQAIDVPAQGLELTIPVDKTWNRHDLYLSTLVVRPGDKSRSATPKRAVGLLHLPLGDDNRRLDLALESPAKMRPNQPLTVRVKASVKHGEMPKQINVLVSAVDSGVLNITDYATPDPWQAFFGQKRYGADIYDIYGQVIEGQGRLAALRFGGDGDDLTRGGKPPVNHANIIAQQAQPITLNEQGEGVVTLPIGDFNGELRVMAQAWTADDFGRGESKVVVAAPVIAELNMPRFLAGGDVSRLVLDVTNLTDRPQTLNIALAASGLLELLSQQPQPVNLAPGVRTTLFVPVRALEGFGEGEIQATISGLNLPGETLGAQHKQWQIGVRPAWPAQTVNSGIALAPGESWHVPEQHLANVSPATLQGQLLLSGKPPLNLARYIRELKAYPYGCLEQTTSGLFPALYTNAAQLQSLGITGDSDEKRRAAVDIGISRILQMQRDNGGFALWDENGAEEPWLTAYAMDFLIRAGEQGYSVPPEAINRGNERLLRYLQDPGTMLIRYSDNTQASTFAAQAYAALVLARQQKAPLGALREIWERRSQAASGLPLMQLGIALNTMGDARRGEEAITLALNTPRQDERQWIADYGSSLRDNALMLSLLEENNLRPDAQNALLSSLSEQAFGQRWLSTQENNALFLAAHSRQASAGAWQAQTSLEAQPLSGDKALTRNLDADQLAALEVTNTGSQPLWLRLDSSGYPSSAPEPASNVLQIERQILGTDGQRKSLSSLRSGELVLVWLTVVADRNVPDALVVDLLPAGLELENQNLADSSASLPESGSEVQNLLNQMQQADIQYMEFRDDRFVAAVVVNEGQPVTLVYLARAVTPGTYQLAQPQVESMYAPQWRATGASEGLLIVTP*", "aa_hash": "ffe361d9273afbab0ed9009dd5aa4041", "aa_len": 1645, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "14": { "parent_id": "locus_3:2:0:14", "locus_name": "locus_3:2:0:14", "seq_id": "locus_3:2:0:14", + "dna_seq": "atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa", + "dna_ambig_count": 0, "dna_hash": "670705cd2a59c4a23a897ac656a888fe", "dna_len": 327, + "aa_seq": "MAKNRSRRLRKKMHIDEFQELGFSVAWRFPEGTSEEQIDKTVDDFINDVIEPNKLAFDGSGYLAWEGLICMQEIGKCTEEHQAIVRKWLEARNLEEVRTSELFDVWWD*", "aa_hash": "f46f4f7b2362abaf1440c9752e10670e", "aa_len": 109, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "15": { "parent_id": "locus_4:3:0:15", "locus_name": "locus_4:3:0:15", "seq_id": "locus_4:3:0:15", + "dna_seq": "ctgaaactaatacctttttatttgctcgcgctattttctgcctcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaacccctgcagataacccgccttataccgtgacaatattgccgccagtagaagcctgcggggaaaaatgtgtgaaactgaacgtagagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaacgacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctacaaccgacagctgcaattacttagggctggcgtaataccgaaaaataaaaagtggaaatggataacgaagatggaaaattactggttcaggcgggtaaaagcgcactaa", + "dna_ambig_count": 0, "dna_hash": "ac1b21798c0f672ad26f5a91ea278590", "dna_len": 417, + "aa_seq": "LKLIPFYLLALFSASSGATEINACKDLIGTWKTPADNPPYTVTILPPVEACGEKCVKLNVEYELDVTHRNALYCHERQEGVKGQGPMVIAFEGAYGGHAIGTYNRQLQLLRAGVIPKNKKWKWITKMENYWFRRVKAH*", "aa_hash": "dbcec3a0e9ecdc165c4e9162b079f2ee", "aa_len": 139, "start_codon": "ctg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "16": { "parent_id": "locus_5:4:0:16", "locus_name": "locus_5:4:0:16", "seq_id": "locus_5:4:0:16", + "dna_seq": "atgaaaaaatttgatagctgtcatcctgtctttttactgatcggctgtgcccaggtgcctctccgttcctccgtgagcaaaccggtacagcaacctagcgctcagaaagagcaactggccaacgcaaatggtattgatgagtgtcagtcacttccgtatgtgccttcagaccttgcgaagaataaatcattatcaaaccagatcgctgataataccgcatcaaaaaatagcgcaatcagctcacgcattttttgcgaaaaatataagcaaaccaaagaacaggcgcttaccttcttccaggaacttccacaatacatgcgttcgaaagaagttgaagagcaacacatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa", + "dna_ambig_count": 0, "dna_hash": "d00defcca8588f21ce16fa1d0ac13389", "dna_len": 444, + "aa_seq": "MKKFDSCHPVFLLIGCAQVPLRSSVSKPVQQPSAQKEQLANANGIDECQSLPYVPSDLAKNKSLSNQIADNTASKNSAISSRIFCEKYKQTKEQALTFFQELPQYMRSKEVEEQHMTEFKKVLLEPGSKNLSIYQTLLAAHERLQAL*", "aa_hash": "82d8baa0a3dad18a0efd8104ee15baae", "aa_len": 148, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "17": { "parent_id": "locus_6:5:0:17", "locus_name": "locus_6:5:0:17", "seq_id": "locus_6:5:0:17", + "dna_seq": "atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa", + "dna_ambig_count": 0, "dna_hash": "a11561f2804e2c32c78049f8b9aeb517", "dna_len": 543, + "aa_seq": "MRIKPDDNWRWYYDEEHDRMMLDLANGMLFRSRFSRKMLTPDAFCPTGFCVDDAALYFSFEEKCRDFELTKEQRAELVLNALVAIRYLKPQMPKSWHFVAHGEMWTPGTGDAASVWLSDTAEQVNLLVVEPGENAALCLLAQPGVVIAGRTMQLGDAIKIMNDRLKPQVHCHSFSLEQAV*", "aa_hash": "f7a904be6607ca5a6d9388ad9ca3693a", "aa_len": 181, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "18": { "parent_id": "locus_7:6:0:18", "locus_name": "locus_7:6:0:18", "seq_id": "locus_7:6:0:18", + "dna_seq": "gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggactcacaacatgcctgtagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgccagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcgccatccagtcggcaacctggttttcggcaggcaggtcgcgtagggcggttaaaaaaatgatggattgcggcagcggtatttattcttcaatcaatacgttactgaaaaattcgcagattaaaaatatagtcattttaacccataatcattgtttgacgtatattgtaaaaaataagcgcggcgtgaagtttgatccggagtatcttaatgcgctcgttatgtatgcggaaaacggaaaactccttttagacggtgaatttgttcccggctag", + "dna_ambig_count": 0, "dna_hash": "dc94bf1ec4ff9bed2a1f460cbd958656", "dna_len": 606, + "aa_seq": "VLAFTLRFIKNKRYFAILAGALVIIAGLDSQHACSGNGLPQINGKALAALAKQHPVVVLFRHAERCDRSDNTCLPDSTGITVNGAQDARALGKAFSADIQNYNLYSSNTVRAIQSATWFSAGRSRRAVKKMMDCGSGIYSSINTLLKNSQIKNIVILTHNHCLTYIVKNKRGVKFDPEYLNALVMYAENGKLLLDGEFVPG*", "aa_hash": "da78b534d889d8f35bec304ef54f1b93", "aa_len": 202, "start_codon": "gtg", - "stop_codon": "tag", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tag", + "count_internal_stop": 0 }, "19": { "parent_id": "locus_8:7:0:19", "locus_name": "locus_8:7:0:19", "seq_id": "locus_8:7:0:19", + "dna_seq": "atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga", + "dna_ambig_count": 0, "dna_hash": "7ebe74afecf146ec4db816c8deced64f", "dna_len": 642, + "aa_seq": "MGSNYIVIEGLEGAGKTTARDVVVETLEQLGIRNMIFTREPGGTQLAEKLRSLVLDIRSVGDEVITDKAEVLMFYAARVQLVETVIKPALAQGVWVIGDRHDLSTQAYQGGGRGIDQTMLATLRDAVLGDFRPDLTLYLDVTPEVGLKRARARGDLDRIEQESFDFFNRTRARYLELAAQDSRIRTIDATQPLDAVMRDIRATVTKWVQEQAA*", "aa_hash": "ad5a47cb3a55985083b963b0cf67f382", "aa_len": 214, "start_codon": "atg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "20": { "parent_id": "locus_9:8:0:20", "locus_name": "locus_9:8:0:20", "seq_id": "locus_9:8:0:20", + "dna_seq": "atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag", + "dna_ambig_count": 0, "dna_hash": "41ebb36872854b2b33c8c028e23d8ad1", "dna_len": 684, + "aa_seq": "MKHIKKSVLVVLLTSHVAHASIVVGGTRLVFDGNNDESSINVENKDSKANLVQSWLSVADPQVTNKQVFIITPPLFRLDAGQKNSIRVIRSGAPLPADRESMYWLNIKGIPSIDDNASANRVEISINTQIKLIYRPPALTKSTPDSQSQQLKWQTAGDVITVNNPTPYYMNFASVTLNSHEVKSATFVPPKSSASFKLSSTAAPHGTVTWRLISDYGMSLEPHSGSF*", "aa_hash": "02989d1dbb505d15f06e76ddc9174b7e", "aa_len": 228, "start_codon": "atg", - "stop_codon": "tag", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tag", + "count_internal_stop": 0 } }, "query_hit_columns": [], @@ -1706,12 +1748,12 @@ }, "locus_12": { "nucleotide": [ - "3", - "4" + "4", + "3" ], "protein": [ - "3", - "4" + "4", + "3" ] }, "locus_13": { diff --git a/tests/test_data/outputs/search/G13/blast/nucleotide/hsps.txt b/tests/test_data/outputs/search/G13/blast/nucleotide/hsps.txt deleted file mode 100755 index a515bcf..0000000 --- a/tests/test_data/outputs/search/G13/blast/nucleotide/hsps.txt +++ /dev/null @@ -1,21 +0,0 @@ -0 0 102 102 1 102 1 102 102 0 100.000 100 100 plus 1.80e-51 189 -1 9 762 762 1 762 1 762 762 0 100.000 100 100 plus 0.0 1408 -2 10 858 858 1 858 1 858 858 19 97.786 100 100 plus 0.0 1480 -3 11 972 972 1 972 1 972 972 0 100.000 100 100 plus 0.0 1796 -4 12 1098 1098 1 1098 1 1098 1098 0 100.000 100 100 plus 0.0 2028 -5 12 1098 1098 1 1098 1 1098 1098 0 100.000 100 100 plus 0.0 2028 -6 13 1281 1281 1 1281 1 1281 1281 11 99.141 100 100 plus 0.0 2305 -7 14 1434 1434 1 1434 1 1434 1434 0 100.000 100 100 plus 0.0 2649 -8 15 1464 1464 1 1464 1 1464 1464 15 98.975 100 100 plus 0.0 2621 -9 16 1836 1836 1 1836 1 1836 1836 0 100.000 100 100 plus 0.0 3391 -10 17 1914 1914 1 1914 1 1914 1914 0 100.000 100 100 plus 0.0 3535 -11 18 2037 2037 1 2037 1 2037 2037 16 99.215 100 100 plus 0.0 3674 -12 1 285 285 1 285 1 285 285 17 94.035 100 100 plus 2.31e-124 433 -13 19 4935 4935 1 4935 1 4935 4935 0 100.000 100 100 plus 0.0 9114 -14 2 327 327 1 327 1 327 327 0 100.000 100 100 plus 5.34e-176 604 -15 3 417 417 1 417 1 417 417 11 97.362 100 100 plus 0.0 710 -16 4 444 444 1 444 1 444 444 15 96.622 100 100 plus 0.0 737 -17 5 543 543 1 543 1 543 543 0 100.000 100 100 plus 0.0 1003 -18 6 606 606 1 606 1 606 606 15 97.525 100 100 plus 0.0 1037 -19 7 642 642 1 642 1 642 642 0 100.000 100 100 plus 0.0 1186 -20 8 684 684 1 684 1 684 684 0 100.000 100 100 plus 0.0 1264 diff --git a/tests/test_data/outputs/search/G13/blast/nucleotide/queries.fasta b/tests/test_data/outputs/search/G13/blast/nucleotide/queries.fasta deleted file mode 100755 index 00e5ec3..0000000 --- a/tests/test_data/outputs/search/G13/blast/nucleotide/queries.fasta +++ /dev/null @@ -1,42 +0,0 @@ ->0 -atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa ->1 -atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa ->2 -gtgaacctggtgaaaaccacgggtctgcacgcggcgggcaaaggcattaacgttgccaaagtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccgacttcaacttttccggctttgacgtgactcccgcagactgggaacgctttgttaacgactccctgagctggctgggtcagttcgatatggtctgcgtcagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatctttgatagtagccgtgaagcgttagtcgccggtcttaaagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcacgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgtgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggtaagtaccgtcggcgcaggcgattccatggttggcggattgatttacggcctgctgatgcgcgagtccagcgaacatacgctgcgtctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga ->3 -atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga ->4 -atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga ->5 -atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga ->6 -ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctcttccgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccaacgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcagaaaggcgggattggtttgcgcggtccgggtgaaacccagctcgaaaccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgaaaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgctgctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaaatccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtctgcaggtacgaatgccgattgtcgactggcgtcgcctctgtaaacaagaaccagcgttgatcgaatacgtgatctag ->7 -gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga ->8 -atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctggtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatcagcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacgctgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcattcaaccggaatggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaacgctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgaggcgctggcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtgagctgaccagccttgtgcgcaaccttaatcaactgctcaaaagcgagcgcgaacgttataacaaataccgcacgaccctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatcagcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcataccccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttttggccgacagcatcccacacagaaagaggaataa ->9 -atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa ->10 -atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag ->11 -atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacattgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccaccgtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggccgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattactggaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcattcagggcggtacggaagtttgtcagcaaaaactggataccatgcgcaacgcaggcgtaaaagtgaacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggcgtcagcgcagagatcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacaggagaccggcaagctcggcgagatcctgttctttatgcgtgcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccggcaataactggcagttcgacggcgacgcggaaactattgcccactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgaaattaccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa ->12 -atgaaactttttctgacaacggcagcgctaactgcgaccttgatttccggcatggcatttgccagcgatcccgtcatcccctgggcgactaacagcggcggtacggaaagcacgcatattgcggcaatgggcgaggatttgaatgcgcagcatcagcagatcacccacacgcatgaaggcgtatgggcggccaattccggcagcattcaggctgatgaagccgcgctgaccagcaacaagccgccggtacaaggccacccggaattgatgccgcatcagggctaa ->13 -atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa ->14 -atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa ->15 -ctgaaactaatacctttttatttgctcgcgctattttctgccgcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaaccactgcagataacccgccttataccatgacaatattgccgccagtagaaggctgcggggaaaaatgtgtgaaactgaacgtacagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaaggacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctataaccgacagctgcaattactttgggctggcgtaataccgaaaaataaaaaggggaaatggataacgaaaatggaaaattactggttcaggcaggtaaaagcgcactaa ->16 -atgaaaaaattttatagctgtcttcctgtctttttactgatcggctgtgcccaggtgcccctcccttcctccgtgagcaaaccggtacagcaacctggcgctcagaaagagcaactggccaacgcaaatagtattgatgagtgtcagtctcttccgtatgtgccgtcagaccttgcgaagaataaatcattatcaaaccagaacgctgataattccgcatcaaaaaatagcgcaatcagctcaagcattttttgcgaaaaatataaacaaaccaaagaacaggcgcttaccttcttccaggaacatccacaatacatgcgttcgaaagaagatgaagagcaactcatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa ->17 -atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa ->18 -gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggcctcacaacatgcctggagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgtcagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcaccatccagtcggcaacctggttttccgcaggcaggtcgcttacggcggataaaaaaatgatggattgcggcagcggtatttatgcttcaatcaatacgttactgaaaaagtcgcagaataaaaatatagtcatttttacccataatcattgtttgacgtatattgcaaaaaataagcgcggcgtgaagtttgatccggactatcttaatgcgctcgttatgtatgcggaaaacggaaaactctttttagacggtgaatttgttcccggctag ->19 -atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga ->20 -atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/search/G13/blast/protein/hsps.txt b/tests/test_data/outputs/search/G13/blast/protein/hsps.txt deleted file mode 100755 index b0cf610..0000000 --- a/tests/test_data/outputs/search/G13/blast/protein/hsps.txt +++ /dev/null @@ -1,21 +0,0 @@ -0 0 34 34 1 34 1 34 34 0 100.000 100 100 N/A 4.94e-20 64.3 -1 9 254 254 1 254 1 254 254 0 100.000 100 100 N/A 0.0 514 -2 10 286 286 1 286 1 286 286 17 94.056 100 100 N/A 0.0 518 -3 11 324 324 1 324 1 324 324 0 100.000 100 100 N/A 0.0 640 -4 12 366 366 1 366 1 366 366 0 100.000 100 100 N/A 0.0 754 -5 12 366 366 1 366 1 366 366 0 100.000 100 100 N/A 0.0 754 -6 13 427 427 1 427 1 427 427 9 97.892 100 100 N/A 0.0 846 -7 14 478 478 1 478 1 478 478 0 100.000 100 100 N/A 0.0 972 -8 15 488 488 1 488 1 488 488 14 97.131 100 100 N/A 0.0 972 -9 16 612 612 1 612 1 612 612 0 100.000 100 100 N/A 0.0 1241 -10 17 638 638 1 638 1 638 638 0 100.000 100 100 N/A 0.0 1326 -11 18 679 679 1 679 1 679 679 8 98.822 100 100 N/A 0.0 1403 -12 1 95 95 1 95 1 95 95 11 88.421 100 100 N/A 2.89e-51 147 -13 19 1645 1645 1 1645 1 1645 1645 0 100.000 100 100 N/A 0.0 3332 -14 2 109 109 1 109 1 109 109 0 100.000 100 100 N/A 3.09e-81 224 -15 3 139 139 1 139 1 139 139 9 93.525 100 100 N/A 3.70e-98 269 -16 4 148 148 1 148 1 148 148 11 92.568 100 100 N/A 1.56e-97 268 -17 5 181 181 1 181 1 181 181 0 100.000 100 100 N/A 1.79e-140 380 -18 6 202 202 1 202 1 202 202 14 93.069 100 100 N/A 2.36e-141 384 -19 7 214 214 1 214 1 214 214 0 100.000 100 100 N/A 1.75e-157 426 -20 8 228 228 1 228 1 228 228 0 100.000 100 100 N/A 6.32e-172 463 diff --git a/tests/test_data/outputs/search/G13/blast/protein/queries.fasta b/tests/test_data/outputs/search/G13/blast/protein/queries.fasta deleted file mode 100755 index 7cb4274..0000000 --- a/tests/test_data/outputs/search/G13/blast/protein/queries.fasta +++ /dev/null @@ -1,42 +0,0 @@ ->0 -MYDPPFLEALMITASFFAIFIIIVVSVLLLEGD* ->1 -MRILLSNDDGVHAPGIQTLAKALREFADVQVVAPDRNRSGASNSLTLESSLRTFTFDNGDIAVQMGTPTDCVYLGVNALMRPRPDIVVSGINAGPNLGDDVIYSGTVAAAMEGRHLGFPALAVSLNGYQHYDTAAAVTCALLRGLSREPLRTGRILNVNVPDLPLAQVKGIRVTRCGSRHPADKVIPQEDPRGNTLYWIGPPGDKYDAGPDTDFAAVDEGYVSVTPLHVDLTAHSAHDVVSDWLDSVGVGTQW* ->2 -VNLVKTTGLHAAGKGINVAKVLKDLGIDVTVGGFLGKDNQDGFQQLFSELGIANRFQVVQGRTRINVKLTEKDGEVTDFNFSGFDVTPADWERFVNDSLSWLGQFDMVCVSGSLPAGVSPEAFTDWMTRLRSQCPCIIFDSSREALVAGLKAAPWLVKPNRRELEIWAGRKLPEMKDVIDAAHALREQGIAHVVISLGAEGALWVNASGEWIAKPPAVDVVSTVGAGDSMVGGLIYGLLMRESSEHTLRLATAVAALAVSQSNVGITDRPQLAAMMARVDLQPFN* ->3 -MNKNKYSTPLLMLATILAGMLSPMQSAVNGQLGHWLQDGNACAVISFASGLVVMFFIIIARKETRQQFASIPTLIKKRKIPLWNWFAGLCGAMVVFSEGASASALGVATFQTALISALLLSGLLCDRFGIGVEEKKYFTPWRITGALFAVIATIFVVSPQWHSTSFILLAILPFLAGLLAGWQPAGNAKVAEATGSMLVSITWNFIVGFCVLGAALAIRIALGHVTIQLPDTWWMYLGGPLGLLSIGLMAILVRGLGLLMLGVASTAGQLLGSVLIDELIPSLGNTVYLVTIIGTLFALVGAIVTTIPEYRASKMAQKMEVSE* ->4 -MKGRWAKYVATGVMLAMLAACSSKPTDRGQQYKDGKFTQPFSLVNQPDAVGAPINAGDFAEQVDQIRSASPRLYTNQSNVYNAVQNWLRSGGDTRTMRQFGIDAWQMEGTDNYGNVQFTGYYTPVVQARHTRQGAFQYPIYSMPPKRGRLPSRAQIYAGALSDKYILAWSNSLMDNFIMDVQGSGYIDFGDGSPLNFFSYAGKNGWPYRSIGKVLIDRGEVKKEDMSMQAIREWGEKHSEAEVRELLEQNPSFVFFKPQSFAPVKGASAVPLIGRASVASDRSIIPPGTTLLAEVPLLDNNGKFSGQYELRLMVALDVGGAIKGQHFDIYQGIGPDAGHRAGWYNHYGRVWVLKSAPGAGNVFSG* ->5 -MKGRWAKYVATGVMLAMLAACSSKPTDRGQQYKDGKFTQPFSLVNQPDAVGAPINAGDFAEQVDQIRSASPRLYTNQSNVYNAVQNWLRSGGDTRTMRQFGIDAWQMEGTDNYGNVQFTGYYTPVVQARHTRQGAFQYPIYSMPPKRGRLPSRAQIYAGALSDKYILAWSNSLMDNFIMDVQGSGYIDFGDGSPLNFFSYAGKNGWPYRSIGKVLIDRGEVKKEDMSMQAIREWGEKHSEAEVRELLEQNPSFVFFKPQSFAPVKGASAVPLIGRASVASDRSIIPPGTTLLAEVPLLDNNGKFSGQYELRLMVALDVGGAIKGQHFDIYQGIGPDAGHRAGWYNHYGRVWVLKSAPGAGNVFSG* ->6 -LFDRYDAGEQAVLVHIYFSQDKDMEDLQEFESLVSSAGVEAMQVITGSRKAPHPKYFVGEGKAVEIAEAVKATGAAVVLFDHALSPAQERNLERLCECRVIDRTGLILDIFAQRARTHEGKLQVELAQLRHLATRLVRGWTHLERQKGGIGLRGPGETQLETDRRLLRNRIVQIQSRLEKVEKQREQGRQSRIKADVPTVSLVGYTNAGKSTLFNQITEARVYAADQLFATLDPTLRRIDVADVGETVLADTVGFIRHLPYDLVAAFKATLQETRQATLLLHVVDAADVRVQENIEAVNTVLEEIDAHEIPTLMVMNKIDMLDDFEPRIDRDEENKPIRVWLSAQSGVGIPQLFQALTERLSGEVAQHTLRLLPQEGRLRSRFYQLQAIEKEWMEEDGSVSLQVRMPIVDWRRLCKQEPALIEYVI* ->7 -VAQRILVLGASGYIGQHLVFALSQQGHQVRAAARRVERLEKHRLANVSCHKVDLHWPENLPALLRDIDTVYYLVHGMGEGGDFIAHERQAALNVRDALRQTPVKQLIFLSSLQAPAHEQSDHLRARQLTADTLRDAGVPVTELRAGIIVGAGSAAFEVMRDMVYNLPILTPPRWVRSRTTPIALENLLYYLVGLLEHPAHEHRILEAAGPQVLSYQQQFERFMAVSGKRRPLIPVPFPTRWISVWFLNVITSVPPTTAKALIQGLRHDLLADDAALKKLIPQTLITFDDAVRRTLKEEEKLVNSSDWGYDALAFARWRPEYGYFPKQAGFTAQTPASLSALWQVVNRLGGKEGYFFGNILWQTRAAMDRLVGHKLAKGRPSHTLLKPGDTVDSWKVIIVEPEKQLTLLFGMKAPGLGRLSFTLHDKGRYREIDVRAWWHPHGMPGLIYWLLMIPAHLFIFRGMARRIARLAEQITEK* ->8 -MNKFARHFLPLSLRVRFLLATAGVVLVLSLAYGIVALVGYSVSFDKTTFRLLRGESNLFYTLAKWENNKISVELPENLDMQSPTMTLIYDETGKLLWTQRNIPWLIKSIQPEWLKTNGFHEIETNVDATSTLLSEDHSAQEKLKEVREDDDDAEMTHSVAVNIYPATTRMPQLTIVVVDTIPIELKRSYMVWSWFVYVLAANLLLVIPLLWIAAWWSLRPIEALAREVRELEDHHREMLNPETTRELTSLVRNLNQLLKSERERYNKYRTTLTDLTHSLKTPLAVLQSTLRSLRNEKMSVSKAEPVMLEQISRISQQIGYYLHRASMRGSGVLLSRELHPVAPLLDNLISALNKVYQRKGVNISMDISPEISFVGEQNDFVEVMGNVLDNACKYCLEFVEISARQTDDHLHIFVEDDGPGIPHSKRSLVFDRGQRADTLRPGQGVGLAVAREITEQYAGQIIASDSLLGGARMEVVFGRQHPTQKEE* ->9 -MTIQKRLLEAVEQKLLRPIDAQFALTVAGNDDPAVTLAAALLSHDAGEGHVCLPLSRLTLTEEAHPLLVAWISETATPIDWKKRLLASAAVSCGDSPAPLILCGDRLYLNRMWCNERTVARFFNEVNQAIAVDEDQLSRILDALFPPTDEVNWQKVAAAVALTRRISVISGGPGTGKTTTVAKLLAALIQMADGERCRIRLAAPTGKAAARLTESLGAALRQLPLTDAQKKRIPEDASTLHRLLGAQPGSQRLRHHAGNPLHLDVLVVDEASMIDLPMMSRLIDALPPHGRVIFLGDRDQLASVEAGAVLGDICAYVNAGFTAERARQLSRLTGSAIPAGAGTQAASLRDSLCLLQKSYRFGSDSGIGKLAAAINCGDRSAIQAVFQQGFSDIEKRTLQSSDDYAGMLDEALAGYGRYLRLLHEKAAPEAILQAFNEYQLLCALREGPFGVRGLNDRIEQAMVQQRKIQRHPHSRWYEGRPVMIARNDSALGLFNGDIGIALDRGQGLRVWFVMPDGTIKSVQPSRLPEHDTTWAMTVHKSQGSEFDHAALILPSQRSPVVTRELVYTAVTRARRRLSLYADERILAGAIVTRTERRSGLATLFDEVSRIG* ->10 -MQEVAMSSQEASKMLRTYNIAWWGNNYYDVNELGHISVCPDPDVPEARVDLAKLVKAREAQGQRLPALFCFPQILQHRLRSINAAFKRARESYGYNGDYFLVYPIKVNQHRRVIESLIHSGEPLGLEAGSKAELMAVLAHAGMTRSVIVCNGYKDREYIRLALIGEKMGHKVYLVIEKMSEIAIVLEEAERLNVVPRLGVRARLASQGSGKWQSSGGEKSKFGLAATQVLQLVETLRDAGRLDSLQLLHFHLGSQMANIRDIATGVRESARFYVELHKLGVNIQCFDVGGGLGVDYEGTRSQSDCSVNYGLNEYANNIIWAIGDACEEHGLPHPTVITESGRAVTAHHTVLVSNIIGVERNEYTDPTAPAEDAPRALQNLWETWQEMHKPGTRRSLREWLHDSQMDLHDIHIGYSSGAFSLQERAWAEQLYLSMCHEVQKQLDPQNRAHRPIIDELQERMADKMYVNFSLFQSMPDAWGIDQLFPVLPLEGLDQVPERRAVLLDITCDSDGAIDHYIDGDGIATTMPMPEYDPENPPMLGFFMVGAYQEILGNMHNLFGDTEAVDVFVFPDGSVEVELSDEGDTVADMLQYVQLDPKTLLTHFRDQVKQTDLDDALQQQFLEEFEAGLYGYTYLEDE* ->11 -MNSLPQRSTDFELTTSQDGFALSWQQRLILRHSAENPCLWIGAGVADIDMFRGNFSIKDKLNEKIALTEATVSELPDGWLVQFSRGATISATLRISADEAGRLTLDLQNDDLHHNRIWLRLAANPDDHIYGCGEQFSYFDLRGKPFPLWTSEQGVGRNKTSYVTWQADCKENAGGDYYWTFFPQPTFVSTQKYYCHVDNSCYMNFDFSAPEYHELALWEDKTTLRFECADTYIALLEKLTALLGRQPELPDWVYDGVTLGIQGGTEVCQQKLDTMRNAGVKVNGIWAQDWSGIRMTSFGKRVMWNWKWNSDNYPQLDSRIKQWKEEGVQFLSYINPYVASDKDLCAEAARHGYLAKDATGGDYLVEFGEFYGGVVDLTNPEAYDWFKDVIKKNMIALGCSGWMADFGEYLPTDTYLHNGVSAEIMHNAWPALWAKCNYEALQETGKLGEILFFMRAGYTGSQKYSTMMWAGDQNVDWSLDDGLASVVPAALSLAMTGHGLHHSDIGGYTTLFDMKRSKELLLRWCDFSAFTPMMRTHEGNRPGNNWQFDGDAETIAHFARMTTVFTTLKPYLKQAVAQNAATGLPVMRPLFLHYENDAATYTLKYQYLLGQDLLVAPVHEQGRCDWTLYLPEDHWVNIWTGEAHHGGEITVDAPIGKPPVFYRAKSEWALLFASLRNI* ->12 -MKLFLTTAALTATLISGMAFASDPVIPWATNSGGTESTHIAAMGEDLNAQHQQITHTHEGVWAANSGSIQADEAALTSNKPPVQGHPELMPHQG* ->13 -MKHLRVVACMIMLALAGCDNNDKTAPTTKSEAPAVAQPSPAQDPSQLQKLAQQSQGKALTLLDASEAQLDGAATLVLTFSIPLDPEQDFSRVVHVVDKKSGSVDGAWELAPNLKELRLRHLEPERVLVVTVDPAVKALNNATFGKSYEKTITTRDVQPSVGFASRGSLLPGKIAEGLPVMALNVNHVDVNFFRVKPGSLASFVSQWEYRSSLSNWESDNLLKMADLVYTGRFDLNPARNTREKLLLPLSDIKPLQQAGVYVAVMNQAGHYNYSNAATLFTLSDIGVSAHRYHSRLDIFTQSLENGAAQSGIEIVLLNDKGQTLAQATSDAQGHVQLEADKAAALLLARKEGQTTLLDLTLPALDLSEFNVAGAPGYSKQFFMFGPRDLYRPGETVILNGLLRDSDGKTLPDQPVKLEVVKPDGQVMRTVVSQPENGLYRLNYPLDINAPTGLWHVRANTGDNLLRSWDFHVEDFMPERMALNLTAQKTPLAPADEVKFSVVGYYLYGAPANGNTLQGQLFLRPLRDAVAALPGFQFGNIAEENLSRSLDEVQLTLDKGGRGEVSAASQWQEAHSPLQVILQASLLESGGRPVTRRVEQAIWPADTLPGIRPQFAAKAVYDYRTDTTVNQPIVDEDSNAAFDIVYANAQGEKKAVSGLQVRLIRERRDYYWNWSESEGWQSQFDQKDLVEGEQTLDLNADETGKVSFPVEWGAYRLEVKAPNETVSSVRFWAGYSWQDNSDGSGAARPDRVTLKLDKANYRPGDTMKLHIAAPVAGKGYAMVESSDGPLWWQAIDVPAQGLELTIPVDKTWNRHDLYLSTLVVRPGDKSRSATPKRAVGLLHLPLGDDNRRLDLALESPAKMRPNQPLTVRVKASVKHGEMPKQINVLVSAVDSGVLNITDYATPDPWQAFFGQKRYGADIYDIYGQVIEGQGRLAALRFGGDGDDLTRGGKPPVNHANIIAQQAQPITLNEQGEGVVTLPIGDFNGELRVMAQAWTADDFGRGESKVVVAAPVIAELNMPRFLAGGDVSRLVLDVTNLTDRPQTLNIALAASGLLELLSQQPQPVNLAPGVRTTLFVPVRALEGFGEGEIQATISGLNLPGETLGAQHKQWQIGVRPAWPAQTVNSGIALAPGESWHVPEQHLANVSPATLQGQLLLSGKPPLNLARYIRELKAYPYGCLEQTTSGLFPALYTNAAQLQSLGITGDSDEKRRAAVDIGISRILQMQRDNGGFALWDENGAEEPWLTAYAMDFLIRAGEQGYSVPPEAINRGNERLLRYLQDPGTMLIRYSDNTQASTFAAQAYAALVLARQQKAPLGALREIWERRSQAASGLPLMQLGIALNTMGDARRGEEAITLALNTPRQDERQWIADYGSSLRDNALMLSLLEENNLRPDAQNALLSSLSEQAFGQRWLSTQENNALFLAAHSRQASAGAWQAQTSLEAQPLSGDKALTRNLDADQLAALEVTNTGSQPLWLRLDSSGYPSSAPEPASNVLQIERQILGTDGQRKSLSSLRSGELVLVWLTVVADRNVPDALVVDLLPAGLELENQNLADSSASLPESGSEVQNLLNQMQQADIQYMEFRDDRFVAAVVVNEGQPVTLVYLARAVTPGTYQLAQPQVESMYAPQWRATGASEGLLIVTP* ->14 -MAKNRSRRLRKKMHIDEFQELGFSVAWRFPEGTSEEQIDKTVDDFINDVIEPNKLAFDGSGYLAWEGLICMQEIGKCTEEHQAIVRKWLEARNLEEVRTSELFDVWWD* ->15 -LKLIPFYLLALFSAASGATEINACKDLIGTWKTTADNPPYTMTILPPVEGCGEKCVKLNVQYELDVTHRNALYCHEGQEGVKGQGPMVIAFEGAYGGHAIGTYNRQLQLLWAGVIPKNKKGKWITKMENYWFRQVKAH* ->16 -MKKFYSCLPVFLLIGCAQVPLPSSVSKPVQQPGAQKEQLANANSIDECQSLPYVPSDLAKNKSLSNQNADNSASKNSAISSSIFCEKYKQTKEQALTFFQEHPQYMRSKEDEEQLMTEFKKVLLEPGSKNLSIYQTLLAAHERLQAL* ->17 -MRIKPDDNWRWYYDEEHDRMMLDLANGMLFRSRFSRKMLTPDAFCPTGFCVDDAALYFSFEEKCRDFELTKEQRAELVLNALVAIRYLKPQMPKSWHFVAHGEMWTPGTGDAASVWLSDTAEQVNLLVVEPGENAALCLLAQPGVVIAGRTMQLGDAIKIMNDRLKPQVHCHSFSLEQAV* ->18 -VLAFTLRFIKNKRYFAILAGALVIIAGLASQHAWSGNGLPQINGKALAALAKQHPVVVLFRHAERCDRSDNTCLSDSTGITVNGAQDARALGKAFSADIQNYNLYSSNTVRTIQSATWFSAGRSLTADKKMMDCGSGIYASINTLLKKSQNKNIVIFTHNHCLTYIAKNKRGVKFDPDYLNALVMYAENGKLFLDGEFVPG* ->19 -MGSNYIVIEGLEGAGKTTARDVVVETLEQLGIRNMIFTREPGGTQLAEKLRSLVLDIRSVGDEVITDKAEVLMFYAARVQLVETVIKPALAQGVWVIGDRHDLSTQAYQGGGRGIDQTMLATLRDAVLGDFRPDLTLYLDVTPEVGLKRARARGDLDRIEQESFDFFNRTRARYLELAAQDSRIRTIDATQPLDAVMRDIRATVTKWVQEQAA* ->20 -MKHIKKSVLVVLLTSHVAHASIVVGGTRLVFDGNNDESSINVENKDSKANLVQSWLSVADPQVTNKQVFIITPPLFRLDAGQKNSIRVIRSGAPLPADRESMYWLNIKGIPSIDDNASANRVEISINTQIKLIYRPPALTKSTPDSQSQQLKWQTAGDVITVNNPTPYYMNFASVTLNSHEVKSATFVPPKSSASFKLSSTAAPHGTVTWRLISDYGMSLEPHSGSF* diff --git a/tests/test_data/outputs/search/G13/run.json b/tests/test_data/outputs/search/G13/run.json index 9e512b7..f9f87b8 100755 --- a/tests/test_data/outputs/search/G13/run.json +++ b/tests/test_data/outputs/search/G13/run.json @@ -1,11 +1,15 @@ { - "analysis_start_time": "10/06/2024 11:12:44", + "analysis_start_time": "19/06/2024 10:44:04", "parameters": { - "query": "locidex/extract/G13/raw.extracted.seqs.fasta", - "outdir": "locidex/search/G13", + "command": "search", + "query": "test_dev/extract/G13/raw.extracted.seqs.fasta", + "outdir": "test_dev/search/G13", + "db": "test_set/db", + "db_group": null, "name": "G13", - "db": "locidex/db", "config": null, + "db_name": "Locidex Test Database", + "db_version": "1.0.0", "min_evalue": 0.0001, "min_dna_len": 1, "min_aa_len": 1, @@ -20,8 +24,16 @@ "format": null, "translation_table": 11, "annotate": false, - "force": true + "force": true, + "db_date": "2024/06/06", + "db_author": "James Robertson", + "db_desc": "Using Salmonella GCA_000007545.2 as source data", + "db_num_seqs": 20, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" }, - "result_file": "locidex/search/G13/seq_store.json", - "analysis_end_time": "10/06/2024 11:12:46" + "result_file": "test_dev/search/G13/seq_store.json", + "analysis_end_time": "19/06/2024 10:44:05" } \ No newline at end of file diff --git a/tests/test_data/outputs/search/G13/seq_store.json b/tests/test_data/outputs/search/G13/seq_store.json index 7e156b9..756a60a 100755 --- a/tests/test_data/outputs/search/G13/seq_store.json +++ b/tests/test_data/outputs/search/G13/seq_store.json @@ -460,274 +460,316 @@ "parent_id": "locus_1:0:0:0", "locus_name": "locus_1:0:0:0", "seq_id": "locus_1:0:0:0", + "dna_seq": "atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa", + "dna_ambig_count": 0, "dna_hash": "d17b02d12afa7f832ee37df6f24a8f55", "dna_len": 102, + "aa_seq": "MYDPPFLEALMITASFFAIFIIIVVSVLLLEGD*", "aa_hash": "a931d1f75114576e60538364eb01a05f", "aa_len": 34, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "1": { "parent_id": "locus_10:9:0:1", "locus_name": "locus_10:9:0:1", "seq_id": "locus_10:9:0:1", + "dna_seq": "atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa", + "dna_ambig_count": 0, "dna_hash": "fe04d17ec353c08b903c85fc0ca4dc02", "dna_len": 762, + "aa_seq": "MRILLSNDDGVHAPGIQTLAKALREFADVQVVAPDRNRSGASNSLTLESSLRTFTFDNGDIAVQMGTPTDCVYLGVNALMRPRPDIVVSGINAGPNLGDDVIYSGTVAAAMEGRHLGFPALAVSLNGYQHYDTAAAVTCALLRGLSREPLRTGRILNVNVPDLPLAQVKGIRVTRCGSRHPADKVIPQEDPRGNTLYWIGPPGDKYDAGPDTDFAAVDEGYVSVTPLHVDLTAHSAHDVVSDWLDSVGVGTQW*", "aa_hash": "988bf512f0362e276b0e5622fbaa7079", "aa_len": 254, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "2": { "parent_id": "locus_11:10:0:2", "locus_name": "locus_11:10:0:2", "seq_id": "locus_11:10:0:2", + "dna_seq": "gtgaacctggtgaaaaccacgggtctgcacgcggcgggcaaaggcattaacgttgccaaagtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccgacttcaacttttccggctttgacgtgactcccgcagactgggaacgctttgttaacgactccctgagctggctgggtcagttcgatatggtctgcgtcagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatctttgatagtagccgtgaagcgttagtcgccggtcttaaagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcacgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgtgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggtaagtaccgtcggcgcaggcgattccatggttggcggattgatttacggcctgctgatgcgcgagtccagcgaacatacgctgcgtctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga", + "dna_ambig_count": 0, "dna_hash": "c4266f2f24fdd8e039113c6b0955af9f", "dna_len": 858, + "aa_seq": "VNLVKTTGLHAAGKGINVAKVLKDLGIDVTVGGFLGKDNQDGFQQLFSELGIANRFQVVQGRTRINVKLTEKDGEVTDFNFSGFDVTPADWERFVNDSLSWLGQFDMVCVSGSLPAGVSPEAFTDWMTRLRSQCPCIIFDSSREALVAGLKAAPWLVKPNRRELEIWAGRKLPEMKDVIDAAHALREQGIAHVVISLGAEGALWVNASGEWIAKPPAVDVVSTVGAGDSMVGGLIYGLLMRESSEHTLRLATAVAALAVSQSNVGITDRPQLAAMMARVDLQPFN*", "aa_hash": "9b9be0e0a2b6f84053716d6c14a0fb9a", "aa_len": 286, "start_codon": "gtg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "3": { "parent_id": "locus_12:11:0:3", "locus_name": "locus_12:11:0:3", "seq_id": "locus_12:11:0:3", + "dna_seq": "atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga", + "dna_ambig_count": 0, "dna_hash": "eb72da68c159497d5f0c8eeddc51b5ae", "dna_len": 972, + "aa_seq": "MNKNKYSTPLLMLATILAGMLSPMQSAVNGQLGHWLQDGNACAVISFASGLVVMFFIIIARKETRQQFASIPTLIKKRKIPLWNWFAGLCGAMVVFSEGASASALGVATFQTALISALLLSGLLCDRFGIGVEEKKYFTPWRITGALFAVIATIFVVSPQWHSTSFILLAILPFLAGLLAGWQPAGNAKVAEATGSMLVSITWNFIVGFCVLGAALAIRIALGHVTIQLPDTWWMYLGGPLGLLSIGLMAILVRGLGLLMLGVASTAGQLLGSVLIDELIPSLGNTVYLVTIIGTLFALVGAIVTTIPEYRASKMAQKMEVSE*", "aa_hash": "2fb77f6d0f615a030a08af74f3eda277", "aa_len": 324, "start_codon": "atg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "4": { "parent_id": "locus_13:12:0:4", "locus_name": "locus_13:12:0:4", "seq_id": "locus_13:12:0:4", + "dna_seq": "atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga", + "dna_ambig_count": 0, "dna_hash": "8f300259dcb46224bdc1fe5273107324", "dna_len": 1098, + "aa_seq": "MKGRWAKYVATGVMLAMLAACSSKPTDRGQQYKDGKFTQPFSLVNQPDAVGAPINAGDFAEQVDQIRSASPRLYTNQSNVYNAVQNWLRSGGDTRTMRQFGIDAWQMEGTDNYGNVQFTGYYTPVVQARHTRQGAFQYPIYSMPPKRGRLPSRAQIYAGALSDKYILAWSNSLMDNFIMDVQGSGYIDFGDGSPLNFFSYAGKNGWPYRSIGKVLIDRGEVKKEDMSMQAIREWGEKHSEAEVRELLEQNPSFVFFKPQSFAPVKGASAVPLIGRASVASDRSIIPPGTTLLAEVPLLDNNGKFSGQYELRLMVALDVGGAIKGQHFDIYQGIGPDAGHRAGWYNHYGRVWVLKSAPGAGNVFSG*", "aa_hash": "fd5140611af6e0d9f95426bf9beaa8b1", "aa_len": 366, "start_codon": "atg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "5": { "parent_id": "locus_13:12:0:5", "locus_name": "locus_13:12:0:5", "seq_id": "locus_13:12:0:5", + "dna_seq": "atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga", + "dna_ambig_count": 0, "dna_hash": "8f300259dcb46224bdc1fe5273107324", "dna_len": 1098, + "aa_seq": "MKGRWAKYVATGVMLAMLAACSSKPTDRGQQYKDGKFTQPFSLVNQPDAVGAPINAGDFAEQVDQIRSASPRLYTNQSNVYNAVQNWLRSGGDTRTMRQFGIDAWQMEGTDNYGNVQFTGYYTPVVQARHTRQGAFQYPIYSMPPKRGRLPSRAQIYAGALSDKYILAWSNSLMDNFIMDVQGSGYIDFGDGSPLNFFSYAGKNGWPYRSIGKVLIDRGEVKKEDMSMQAIREWGEKHSEAEVRELLEQNPSFVFFKPQSFAPVKGASAVPLIGRASVASDRSIIPPGTTLLAEVPLLDNNGKFSGQYELRLMVALDVGGAIKGQHFDIYQGIGPDAGHRAGWYNHYGRVWVLKSAPGAGNVFSG*", "aa_hash": "fd5140611af6e0d9f95426bf9beaa8b1", "aa_len": 366, "start_codon": "atg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "6": { "parent_id": "locus_14:13:0:6", "locus_name": "locus_14:13:0:6", "seq_id": "locus_14:13:0:6", + "dna_seq": "ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctcttccgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccaacgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcagaaaggcgggattggtttgcgcggtccgggtgaaacccagctcgaaaccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgaaaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgctgctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaaatccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtctgcaggtacgaatgccgattgtcgactggcgtcgcctctgtaaacaagaaccagcgttgatcgaatacgtgatctag", + "dna_ambig_count": 0, "dna_hash": "2fa0b06ed72e36b4071cab9d0b4f87d0", "dna_len": 1281, + "aa_seq": "LFDRYDAGEQAVLVHIYFSQDKDMEDLQEFESLVSSAGVEAMQVITGSRKAPHPKYFVGEGKAVEIAEAVKATGAAVVLFDHALSPAQERNLERLCECRVIDRTGLILDIFAQRARTHEGKLQVELAQLRHLATRLVRGWTHLERQKGGIGLRGPGETQLETDRRLLRNRIVQIQSRLEKVEKQREQGRQSRIKADVPTVSLVGYTNAGKSTLFNQITEARVYAADQLFATLDPTLRRIDVADVGETVLADTVGFIRHLPYDLVAAFKATLQETRQATLLLHVVDAADVRVQENIEAVNTVLEEIDAHEIPTLMVMNKIDMLDDFEPRIDRDEENKPIRVWLSAQSGVGIPQLFQALTERLSGEVAQHTLRLLPQEGRLRSRFYQLQAIEKEWMEEDGSVSLQVRMPIVDWRRLCKQEPALIEYVI*", "aa_hash": "bf5190f310477277da454725d434a8ee", "aa_len": 427, "start_codon": "ttg", - "stop_codon": "tag", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tag", + "count_internal_stop": 0 }, "7": { "parent_id": "locus_15:14:0:7", "locus_name": "locus_15:14:0:7", "seq_id": "locus_15:14:0:7", + "dna_seq": "gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga", + "dna_ambig_count": 0, "dna_hash": "bc98c2fe196a68a79036814396513a8d", "dna_len": 1434, + "aa_seq": "VAQRILVLGASGYIGQHLVFALSQQGHQVRAAARRVERLEKHRLANVSCHKVDLHWPENLPALLRDIDTVYYLVHGMGEGGDFIAHERQAALNVRDALRQTPVKQLIFLSSLQAPAHEQSDHLRARQLTADTLRDAGVPVTELRAGIIVGAGSAAFEVMRDMVYNLPILTPPRWVRSRTTPIALENLLYYLVGLLEHPAHEHRILEAAGPQVLSYQQQFERFMAVSGKRRPLIPVPFPTRWISVWFLNVITSVPPTTAKALIQGLRHDLLADDAALKKLIPQTLITFDDAVRRTLKEEEKLVNSSDWGYDALAFARWRPEYGYFPKQAGFTAQTPASLSALWQVVNRLGGKEGYFFGNILWQTRAAMDRLVGHKLAKGRPSHTLLKPGDTVDSWKVIIVEPEKQLTLLFGMKAPGLGRLSFTLHDKGRYREIDVRAWWHPHGMPGLIYWLLMIPAHLFIFRGMARRIARLAEQITEK*", "aa_hash": "5d8c3cf06444c3741ebfc57cc92db0d3", "aa_len": 478, "start_codon": "gtg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "8": { "parent_id": "locus_16:15:0:8", "locus_name": "locus_16:15:0:8", "seq_id": "locus_16:15:0:8", + "dna_seq": "atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctggtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatcagcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacgctgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcattcaaccggaatggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaacgctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgaggcgctggcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtgagctgaccagccttgtgcgcaaccttaatcaactgctcaaaagcgagcgcgaacgttataacaaataccgcacgaccctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatcagcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcataccccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttttggccgacagcatcccacacagaaagaggaataa", + "dna_ambig_count": 0, "dna_hash": "a9b3cb97dac3cda6e932a49bf9a507bd", "dna_len": 1464, + "aa_seq": "MNKFARHFLPLSLRVRFLLATAGVVLVLSLAYGIVALVGYSVSFDKTTFRLLRGESNLFYTLAKWENNKISVELPENLDMQSPTMTLIYDETGKLLWTQRNIPWLIKSIQPEWLKTNGFHEIETNVDATSTLLSEDHSAQEKLKEVREDDDDAEMTHSVAVNIYPATTRMPQLTIVVVDTIPIELKRSYMVWSWFVYVLAANLLLVIPLLWIAAWWSLRPIEALAREVRELEDHHREMLNPETTRELTSLVRNLNQLLKSERERYNKYRTTLTDLTHSLKTPLAVLQSTLRSLRNEKMSVSKAEPVMLEQISRISQQIGYYLHRASMRGSGVLLSRELHPVAPLLDNLISALNKVYQRKGVNISMDISPEISFVGEQNDFVEVMGNVLDNACKYCLEFVEISARQTDDHLHIFVEDDGPGIPHSKRSLVFDRGQRADTLRPGQGVGLAVAREITEQYAGQIIASDSLLGGARMEVVFGRQHPTQKEE*", "aa_hash": "3ca5f1d7b46eda9460608ef61603c12f", "aa_len": 488, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "9": { "parent_id": "locus_17:16:0:9", "locus_name": "locus_17:16:0:9", "seq_id": "locus_17:16:0:9", + "dna_seq": "atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa", + "dna_ambig_count": 0, "dna_hash": "a0d97d985483413f3c18bfe5833ae9ce", "dna_len": 1836, + "aa_seq": "MTIQKRLLEAVEQKLLRPIDAQFALTVAGNDDPAVTLAAALLSHDAGEGHVCLPLSRLTLTEEAHPLLVAWISETATPIDWKKRLLASAAVSCGDSPAPLILCGDRLYLNRMWCNERTVARFFNEVNQAIAVDEDQLSRILDALFPPTDEVNWQKVAAAVALTRRISVISGGPGTGKTTTVAKLLAALIQMADGERCRIRLAAPTGKAAARLTESLGAALRQLPLTDAQKKRIPEDASTLHRLLGAQPGSQRLRHHAGNPLHLDVLVVDEASMIDLPMMSRLIDALPPHGRVIFLGDRDQLASVEAGAVLGDICAYVNAGFTAERARQLSRLTGSAIPAGAGTQAASLRDSLCLLQKSYRFGSDSGIGKLAAAINCGDRSAIQAVFQQGFSDIEKRTLQSSDDYAGMLDEALAGYGRYLRLLHEKAAPEAILQAFNEYQLLCALREGPFGVRGLNDRIEQAMVQQRKIQRHPHSRWYEGRPVMIARNDSALGLFNGDIGIALDRGQGLRVWFVMPDGTIKSVQPSRLPEHDTTWAMTVHKSQGSEFDHAALILPSQRSPVVTRELVYTAVTRARRRLSLYADERILAGAIVTRTERRSGLATLFDEVSRIG*", "aa_hash": "bdd2b967a7a6337b5973dd723a62fa43", "aa_len": 612, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "10": { "parent_id": "locus_18:17:0:10", "locus_name": "locus_18:17:0:10", "seq_id": "locus_18:17:0:10", + "dna_seq": "atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag", + "dna_ambig_count": 0, "dna_hash": "b3021e979faa7600756c06dfadfcf14c", "dna_len": 1914, + "aa_seq": "MQEVAMSSQEASKMLRTYNIAWWGNNYYDVNELGHISVCPDPDVPEARVDLAKLVKAREAQGQRLPALFCFPQILQHRLRSINAAFKRARESYGYNGDYFLVYPIKVNQHRRVIESLIHSGEPLGLEAGSKAELMAVLAHAGMTRSVIVCNGYKDREYIRLALIGEKMGHKVYLVIEKMSEIAIVLEEAERLNVVPRLGVRARLASQGSGKWQSSGGEKSKFGLAATQVLQLVETLRDAGRLDSLQLLHFHLGSQMANIRDIATGVRESARFYVELHKLGVNIQCFDVGGGLGVDYEGTRSQSDCSVNYGLNEYANNIIWAIGDACEEHGLPHPTVITESGRAVTAHHTVLVSNIIGVERNEYTDPTAPAEDAPRALQNLWETWQEMHKPGTRRSLREWLHDSQMDLHDIHIGYSSGAFSLQERAWAEQLYLSMCHEVQKQLDPQNRAHRPIIDELQERMADKMYVNFSLFQSMPDAWGIDQLFPVLPLEGLDQVPERRAVLLDITCDSDGAIDHYIDGDGIATTMPMPEYDPENPPMLGFFMVGAYQEILGNMHNLFGDTEAVDVFVFPDGSVEVELSDEGDTVADMLQYVQLDPKTLLTHFRDQVKQTDLDDALQQQFLEEFEAGLYGYTYLEDE*", "aa_hash": "42c4a831ee79a27c47138fe96829814b", "aa_len": 638, "start_codon": "atg", - "stop_codon": "tag", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tag", + "count_internal_stop": 0 }, "11": { "parent_id": "locus_19:18:0:11", "locus_name": "locus_19:18:0:11", "seq_id": "locus_19:18:0:11", + "dna_seq": "atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacattgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccaccgtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggccgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattactggaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcattcagggcggtacggaagtttgtcagcaaaaactggataccatgcgcaacgcaggcgtaaaagtgaacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggcgtcagcgcagagatcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacaggagaccggcaagctcggcgagatcctgttctttatgcgtgcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccggcaataactggcagttcgacggcgacgcggaaactattgcccactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgaaattaccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa", + "dna_ambig_count": 0, "dna_hash": "de32372598811d63bcc1a0eaf6872644", "dna_len": 2037, + "aa_seq": "MNSLPQRSTDFELTTSQDGFALSWQQRLILRHSAENPCLWIGAGVADIDMFRGNFSIKDKLNEKIALTEATVSELPDGWLVQFSRGATISATLRISADEAGRLTLDLQNDDLHHNRIWLRLAANPDDHIYGCGEQFSYFDLRGKPFPLWTSEQGVGRNKTSYVTWQADCKENAGGDYYWTFFPQPTFVSTQKYYCHVDNSCYMNFDFSAPEYHELALWEDKTTLRFECADTYIALLEKLTALLGRQPELPDWVYDGVTLGIQGGTEVCQQKLDTMRNAGVKVNGIWAQDWSGIRMTSFGKRVMWNWKWNSDNYPQLDSRIKQWKEEGVQFLSYINPYVASDKDLCAEAARHGYLAKDATGGDYLVEFGEFYGGVVDLTNPEAYDWFKDVIKKNMIALGCSGWMADFGEYLPTDTYLHNGVSAEIMHNAWPALWAKCNYEALQETGKLGEILFFMRAGYTGSQKYSTMMWAGDQNVDWSLDDGLASVVPAALSLAMTGHGLHHSDIGGYTTLFDMKRSKELLLRWCDFSAFTPMMRTHEGNRPGNNWQFDGDAETIAHFARMTTVFTTLKPYLKQAVAQNAATGLPVMRPLFLHYENDAATYTLKYQYLLGQDLLVAPVHEQGRCDWTLYLPEDHWVNIWTGEAHHGGEITVDAPIGKPPVFYRAKSEWALLFASLRNI*", "aa_hash": "a48a4e4dc8c7f61a7be06a7f72142198", "aa_len": 679, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "12": { "parent_id": "locus_2:1:0:12", "locus_name": "locus_2:1:0:12", "seq_id": "locus_2:1:0:12", + "dna_seq": "atgaaactttttctgacaacggcagcgctaactgcgaccttgatttccggcatggcatttgccagcgatcccgtcatcccctgggcgactaacagcggcggtacggaaagcacgcatattgcggcaatgggcgaggatttgaatgcgcagcatcagcagatcacccacacgcatgaaggcgtatgggcggccaattccggcagcattcaggctgatgaagccgcgctgaccagcaacaagccgccggtacaaggccacccggaattgatgccgcatcagggctaa", + "dna_ambig_count": 0, "dna_hash": "8b70e777f6bbf2c91ff75947824b5976", "dna_len": 285, + "aa_seq": "MKLFLTTAALTATLISGMAFASDPVIPWATNSGGTESTHIAAMGEDLNAQHQQITHTHEGVWAANSGSIQADEAALTSNKPPVQGHPELMPHQG*", "aa_hash": "6e403f4ed2da629ea2ebfe18278ed120", "aa_len": 95, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "13": { "parent_id": "locus_20:19:0:13", "locus_name": "locus_20:19:0:13", "seq_id": "locus_20:19:0:13", + "dna_seq": "atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa", + "dna_ambig_count": 0, "dna_hash": "4461918e985715e4a2b07494e1f91326", "dna_len": 4935, + "aa_seq": "MKHLRVVACMIMLALAGCDNNDKTAPTTKSEAPAVAQPSPAQDPSQLQKLAQQSQGKALTLLDASEAQLDGAATLVLTFSIPLDPEQDFSRVVHVVDKKSGSVDGAWELAPNLKELRLRHLEPERVLVVTVDPAVKALNNATFGKSYEKTITTRDVQPSVGFASRGSLLPGKIAEGLPVMALNVNHVDVNFFRVKPGSLASFVSQWEYRSSLSNWESDNLLKMADLVYTGRFDLNPARNTREKLLLPLSDIKPLQQAGVYVAVMNQAGHYNYSNAATLFTLSDIGVSAHRYHSRLDIFTQSLENGAAQSGIEIVLLNDKGQTLAQATSDAQGHVQLEADKAAALLLARKEGQTTLLDLTLPALDLSEFNVAGAPGYSKQFFMFGPRDLYRPGETVILNGLLRDSDGKTLPDQPVKLEVVKPDGQVMRTVVSQPENGLYRLNYPLDINAPTGLWHVRANTGDNLLRSWDFHVEDFMPERMALNLTAQKTPLAPADEVKFSVVGYYLYGAPANGNTLQGQLFLRPLRDAVAALPGFQFGNIAEENLSRSLDEVQLTLDKGGRGEVSAASQWQEAHSPLQVILQASLLESGGRPVTRRVEQAIWPADTLPGIRPQFAAKAVYDYRTDTTVNQPIVDEDSNAAFDIVYANAQGEKKAVSGLQVRLIRERRDYYWNWSESEGWQSQFDQKDLVEGEQTLDLNADETGKVSFPVEWGAYRLEVKAPNETVSSVRFWAGYSWQDNSDGSGAARPDRVTLKLDKANYRPGDTMKLHIAAPVAGKGYAMVESSDGPLWWQAIDVPAQGLELTIPVDKTWNRHDLYLSTLVVRPGDKSRSATPKRAVGLLHLPLGDDNRRLDLALESPAKMRPNQPLTVRVKASVKHGEMPKQINVLVSAVDSGVLNITDYATPDPWQAFFGQKRYGADIYDIYGQVIEGQGRLAALRFGGDGDDLTRGGKPPVNHANIIAQQAQPITLNEQGEGVVTLPIGDFNGELRVMAQAWTADDFGRGESKVVVAAPVIAELNMPRFLAGGDVSRLVLDVTNLTDRPQTLNIALAASGLLELLSQQPQPVNLAPGVRTTLFVPVRALEGFGEGEIQATISGLNLPGETLGAQHKQWQIGVRPAWPAQTVNSGIALAPGESWHVPEQHLANVSPATLQGQLLLSGKPPLNLARYIRELKAYPYGCLEQTTSGLFPALYTNAAQLQSLGITGDSDEKRRAAVDIGISRILQMQRDNGGFALWDENGAEEPWLTAYAMDFLIRAGEQGYSVPPEAINRGNERLLRYLQDPGTMLIRYSDNTQASTFAAQAYAALVLARQQKAPLGALREIWERRSQAASGLPLMQLGIALNTMGDARRGEEAITLALNTPRQDERQWIADYGSSLRDNALMLSLLEENNLRPDAQNALLSSLSEQAFGQRWLSTQENNALFLAAHSRQASAGAWQAQTSLEAQPLSGDKALTRNLDADQLAALEVTNTGSQPLWLRLDSSGYPSSAPEPASNVLQIERQILGTDGQRKSLSSLRSGELVLVWLTVVADRNVPDALVVDLLPAGLELENQNLADSSASLPESGSEVQNLLNQMQQADIQYMEFRDDRFVAAVVVNEGQPVTLVYLARAVTPGTYQLAQPQVESMYAPQWRATGASEGLLIVTP*", "aa_hash": "ffe361d9273afbab0ed9009dd5aa4041", "aa_len": 1645, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "14": { "parent_id": "locus_3:2:0:14", "locus_name": "locus_3:2:0:14", "seq_id": "locus_3:2:0:14", + "dna_seq": "atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa", + "dna_ambig_count": 0, "dna_hash": "670705cd2a59c4a23a897ac656a888fe", "dna_len": 327, + "aa_seq": "MAKNRSRRLRKKMHIDEFQELGFSVAWRFPEGTSEEQIDKTVDDFINDVIEPNKLAFDGSGYLAWEGLICMQEIGKCTEEHQAIVRKWLEARNLEEVRTSELFDVWWD*", "aa_hash": "f46f4f7b2362abaf1440c9752e10670e", "aa_len": 109, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "15": { "parent_id": "locus_4:3:0:15", "locus_name": "locus_4:3:0:15", "seq_id": "locus_4:3:0:15", + "dna_seq": "ctgaaactaatacctttttatttgctcgcgctattttctgccgcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaaccactgcagataacccgccttataccatgacaatattgccgccagtagaaggctgcggggaaaaatgtgtgaaactgaacgtacagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaaggacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctataaccgacagctgcaattactttgggctggcgtaataccgaaaaataaaaaggggaaatggataacgaaaatggaaaattactggttcaggcaggtaaaagcgcactaa", + "dna_ambig_count": 0, "dna_hash": "73790840c76943caac0ebb3b2b3f0b98", "dna_len": 417, + "aa_seq": "LKLIPFYLLALFSAASGATEINACKDLIGTWKTTADNPPYTMTILPPVEGCGEKCVKLNVQYELDVTHRNALYCHEGQEGVKGQGPMVIAFEGAYGGHAIGTYNRQLQLLWAGVIPKNKKGKWITKMENYWFRQVKAH*", "aa_hash": "77784601d754a5f36152853592023b08", "aa_len": 139, "start_codon": "ctg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "16": { "parent_id": "locus_5:4:0:16", "locus_name": "locus_5:4:0:16", "seq_id": "locus_5:4:0:16", + "dna_seq": "atgaaaaaattttatagctgtcttcctgtctttttactgatcggctgtgcccaggtgcccctcccttcctccgtgagcaaaccggtacagcaacctggcgctcagaaagagcaactggccaacgcaaatagtattgatgagtgtcagtctcttccgtatgtgccgtcagaccttgcgaagaataaatcattatcaaaccagaacgctgataattccgcatcaaaaaatagcgcaatcagctcaagcattttttgcgaaaaatataaacaaaccaaagaacaggcgcttaccttcttccaggaacatccacaatacatgcgttcgaaagaagatgaagagcaactcatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa", + "dna_ambig_count": 0, "dna_hash": "8cf4341689dd00f74adfcc43d1f4a35e", "dna_len": 444, + "aa_seq": "MKKFYSCLPVFLLIGCAQVPLPSSVSKPVQQPGAQKEQLANANSIDECQSLPYVPSDLAKNKSLSNQNADNSASKNSAISSSIFCEKYKQTKEQALTFFQEHPQYMRSKEDEEQLMTEFKKVLLEPGSKNLSIYQTLLAAHERLQAL*", "aa_hash": "736cc3184dda2c5ac596f76753272622", "aa_len": 148, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "17": { "parent_id": "locus_6:5:0:17", "locus_name": "locus_6:5:0:17", "seq_id": "locus_6:5:0:17", + "dna_seq": "atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa", + "dna_ambig_count": 0, "dna_hash": "a11561f2804e2c32c78049f8b9aeb517", "dna_len": 543, + "aa_seq": "MRIKPDDNWRWYYDEEHDRMMLDLANGMLFRSRFSRKMLTPDAFCPTGFCVDDAALYFSFEEKCRDFELTKEQRAELVLNALVAIRYLKPQMPKSWHFVAHGEMWTPGTGDAASVWLSDTAEQVNLLVVEPGENAALCLLAQPGVVIAGRTMQLGDAIKIMNDRLKPQVHCHSFSLEQAV*", "aa_hash": "f7a904be6607ca5a6d9388ad9ca3693a", "aa_len": 181, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "18": { "parent_id": "locus_7:6:0:18", "locus_name": "locus_7:6:0:18", "seq_id": "locus_7:6:0:18", + "dna_seq": "gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggcctcacaacatgcctggagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgtcagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcaccatccagtcggcaacctggttttccgcaggcaggtcgcttacggcggataaaaaaatgatggattgcggcagcggtatttatgcttcaatcaatacgttactgaaaaagtcgcagaataaaaatatagtcatttttacccataatcattgtttgacgtatattgcaaaaaataagcgcggcgtgaagtttgatccggactatcttaatgcgctcgttatgtatgcggaaaacggaaaactctttttagacggtgaatttgttcccggctag", + "dna_ambig_count": 0, "dna_hash": "49d9878c9d3071aa1d2f26cb947b784c", "dna_len": 606, + "aa_seq": "VLAFTLRFIKNKRYFAILAGALVIIAGLASQHAWSGNGLPQINGKALAALAKQHPVVVLFRHAERCDRSDNTCLSDSTGITVNGAQDARALGKAFSADIQNYNLYSSNTVRTIQSATWFSAGRSLTADKKMMDCGSGIYASINTLLKKSQNKNIVIFTHNHCLTYIAKNKRGVKFDPDYLNALVMYAENGKLFLDGEFVPG*", "aa_hash": "a1169e1ef4c2882247a9349da07cb6bd", "aa_len": 202, "start_codon": "gtg", - "stop_codon": "tag", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tag", + "count_internal_stop": 0 }, "19": { "parent_id": "locus_8:7:0:19", "locus_name": "locus_8:7:0:19", "seq_id": "locus_8:7:0:19", + "dna_seq": "atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga", + "dna_ambig_count": 0, "dna_hash": "7ebe74afecf146ec4db816c8deced64f", "dna_len": 642, + "aa_seq": "MGSNYIVIEGLEGAGKTTARDVVVETLEQLGIRNMIFTREPGGTQLAEKLRSLVLDIRSVGDEVITDKAEVLMFYAARVQLVETVIKPALAQGVWVIGDRHDLSTQAYQGGGRGIDQTMLATLRDAVLGDFRPDLTLYLDVTPEVGLKRARARGDLDRIEQESFDFFNRTRARYLELAAQDSRIRTIDATQPLDAVMRDIRATVTKWVQEQAA*", "aa_hash": "ad5a47cb3a55985083b963b0cf67f382", "aa_len": 214, "start_codon": "atg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "20": { "parent_id": "locus_9:8:0:20", "locus_name": "locus_9:8:0:20", "seq_id": "locus_9:8:0:20", + "dna_seq": "atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag", + "dna_ambig_count": 0, "dna_hash": "41ebb36872854b2b33c8c028e23d8ad1", "dna_len": 684, + "aa_seq": "MKHIKKSVLVVLLTSHVAHASIVVGGTRLVFDGNNDESSINVENKDSKANLVQSWLSVADPQVTNKQVFIITPPLFRLDAGQKNSIRVIRSGAPLPADRESMYWLNIKGIPSIDDNASANRVEISINTQIKLIYRPPALTKSTPDSQSQQLKWQTAGDVITVNNPTPYYMNFASVTLNSHEVKSATFVPPKSSASFKLSSTAAPHGTVTWRLISDYGMSLEPHSGSF*", "aa_hash": "02989d1dbb505d15f06e76ddc9174b7e", "aa_len": 228, "start_codon": "atg", - "stop_codon": "tag", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tag", + "count_internal_stop": 0 } }, "query_hit_columns": [], diff --git a/tests/test_data/outputs/search/G14/blast/nucleotide/hsps.txt b/tests/test_data/outputs/search/G14/blast/nucleotide/hsps.txt deleted file mode 100755 index 3324a8b..0000000 --- a/tests/test_data/outputs/search/G14/blast/nucleotide/hsps.txt +++ /dev/null @@ -1,21 +0,0 @@ -0 0 102 102 1 102 1 102 102 0 100.000 100 100 plus 1.80e-51 189 -1 9 762 762 1 762 1 762 762 0 100.000 100 100 plus 0.0 1408 -2 10 858 858 1 858 1 858 858 0 100.000 100 100 plus 0.0 1585 -3 11 972 972 1 972 1 972 972 0 100.000 100 100 plus 0.0 1796 -4 12 1098 1098 1 1098 1 1098 1098 0 100.000 100 100 plus 0.0 2028 -5 12 1098 1098 1 1098 1 1098 1098 0 100.000 100 100 plus 0.0 2028 -6 13 1281 1281 1 1281 1 1281 1281 0 100.000 100 100 plus 0.0 2366 -7 14 1434 1434 1 1434 1 1434 1434 0 100.000 100 100 plus 0.0 2649 -8 15 1464 1464 1 1464 1 1464 1464 0 100.000 100 100 plus 0.0 2704 -9 16 1836 1836 1 1836 1 1836 1836 0 100.000 100 100 plus 0.0 3391 -10 17 1914 1914 1 1914 1 1914 1914 0 100.000 100 100 plus 0.0 3535 -11 18 2037 2037 1 2037 1 2037 2037 0 100.000 100 100 plus 0.0 3762 -12 1 285 285 1 285 1 285 285 0 100.000 100 100 plus 1.03e-152 527 -13 19 4935 4935 1 4935 1 4935 4935 0 100.000 100 100 plus 0.0 9114 -14 2 327 327 1 327 1 327 327 0 100.000 100 100 plus 5.34e-176 604 -15 3 417 417 1 417 1 417 417 0 100.000 100 100 plus 0.0 771 -16 4 444 444 1 444 1 444 444 0 100.000 100 100 plus 0.0 821 -17 5 543 543 1 543 1 543 543 0 100.000 100 100 plus 0.0 1003 -18 6 606 606 1 606 1 606 606 0 100.000 100 100 plus 0.0 1120 -19 7 642 642 1 642 1 642 642 0 100.000 100 100 plus 0.0 1186 -20 8 684 684 1 684 1 684 684 0 100.000 100 100 plus 0.0 1264 diff --git a/tests/test_data/outputs/search/G14/blast/nucleotide/queries.fasta b/tests/test_data/outputs/search/G14/blast/nucleotide/queries.fasta deleted file mode 100755 index 2da4d02..0000000 --- a/tests/test_data/outputs/search/G14/blast/nucleotide/queries.fasta +++ /dev/null @@ -1,42 +0,0 @@ ->0 -atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa ->1 -atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa ->2 -gtgaacctggtgaaaaccccgggtctgcacgcggcgggcaaaggcattaacgttgccaatgtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccaacttcaactttcccggctttgacgtgactcccgcagactgggaacgctttgttaacgacttcctgagctggctgggtcagttcgatatggtctgcgtaagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatcgttgatactagccgtgtagagttagtcgccggtcttatagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcccgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgcgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggaaaataccgtcggcgcaggcggttccatggttggcggatggatttacggcctgctgatgcgcgagtccaccgaacatacgctgcgcctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga ->3 -atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga ->4 -atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga ->5 -atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga ->6 -ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctattacgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccagcgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcggaaaggcgggattggttcgcgcggtccgggtgaaacccagctcgaagccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgagaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgcagctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaattccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtccgcaggtacgaatgccgattgtcgactggcgtcgcttctgtaaacaagaaccagcgttgatcgaatacgtgatctag ->7 -gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga ->8 -atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctcgtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatctgcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacggtgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcactcaaccggaagggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaaggctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgatgcgctgtcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtaagctgatcagccttgtgcgcaaccttaatcaactgctcaaaagcgagcacgaacgttataacaaataccgcacgagcctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatctgcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcatagcccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttctggccgacagcatcccacacagaaagaggaataa ->9 -atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa ->10 -atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag ->11 -atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacatcgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccacagtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggtcgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattacttgaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcatacagggcggtacggaagtttgtcagcaaaaactggataacatgcgcaacgcaggcgtaaaagtgtacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggggtcagcgcagagctcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacagaagaccggcaagctcggcgagatcctgttctttatgcgggcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccagcaatgactggcagttcgacggcgacgcggaaactattgcacactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgagatttccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa ->12 -atgaaactttttctgacaacggcagcgctaactgcgaccttgacttccggcatgggatttgccagcgataccgtgatcccctgggcgactaacagtggcggtacggaaagcacgcatatcgcggcaatgggcgaggatgtgaatgcgcagcatcagcacatcatccacacgcatgaaggcgtatgtgccgcaaattccggcaccattcaggctgatgaagccgcgctgaccagcaacaagccaccggtacaagtccaaccggaattgttgccgcatcagggctaa ->13 -atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa ->14 -atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa ->15 -ctgaaactaatacctttttatttgctcgcgctattttctgcctcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaacccctgcagataacccgccttataccgtgacaatattgccgccagtagaagcctgcggggaaaaatgtgtgaaactgaacgtagagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaacgacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctacaaccgacagctgcaattacttagggctggcgtaataccgaaaaataaaaagtggaaatggataacgaagatggaaaattactggttcaggcgggtaaaagcgcactaa ->16 -atgaaaaaatttgatagctgtcatcctgtctttttactgatcggctgtgcccaggtgcctctccgttcctccgtgagcaaaccggtacagcaacctagcgctcagaaagagcaactggccaacgcaaatggtattgatgagtgtcagtcacttccgtatgtgccttcagaccttgcgaagaataaatcattatcaaaccagatcgctgataataccgcatcaaaaaatagcgcaatcagctcacgcattttttgcgaaaaatataagcaaaccaaagaacaggcgcttaccttcttccaggaacttccacaatacatgcgttcgaaagaagttgaagagcaacacatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa ->17 -atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa ->18 -gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggactcacaacatgcctgtagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgccagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcgccatccagtcggcaacctggttttcggcaggcaggtcgcgtagggcggttaaaaaaatgatggattgcggcagcggtatttattcttcaatcaatacgttactgaaaaattcgcagattaaaaatatagtcattttaacccataatcattgtttgacgtatattgtaaaaaataagcgcggcgtgaagtttgatccggagtatcttaatgcgctcgttatgtatgcggaaaacggaaaactccttttagacggtgaatttgttcccggctag ->19 -atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga ->20 -atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/search/G14/blast/protein/hsps.txt b/tests/test_data/outputs/search/G14/blast/protein/hsps.txt deleted file mode 100755 index 6423de4..0000000 --- a/tests/test_data/outputs/search/G14/blast/protein/hsps.txt +++ /dev/null @@ -1,21 +0,0 @@ -0 0 34 34 1 34 1 34 34 0 100.000 100 100 N/A 4.94e-20 64.3 -1 9 254 254 1 254 1 254 254 0 100.000 100 100 N/A 0.0 514 -2 10 286 286 1 286 1 286 286 0 100.000 100 100 N/A 0.0 579 -3 11 324 324 1 324 1 324 324 0 100.000 100 100 N/A 0.0 640 -4 12 366 366 1 366 1 366 366 0 100.000 100 100 N/A 0.0 754 -5 12 366 366 1 366 1 366 366 0 100.000 100 100 N/A 0.0 754 -6 13 427 427 1 427 1 427 427 0 100.000 100 100 N/A 0.0 869 -7 14 478 478 1 478 1 478 478 0 100.000 100 100 N/A 0.0 972 -8 15 488 488 1 488 1 488 488 0 100.000 100 100 N/A 0.0 1004 -9 16 612 612 1 612 1 612 612 0 100.000 100 100 N/A 0.0 1241 -10 17 638 638 1 638 1 638 638 0 100.000 100 100 N/A 0.0 1326 -11 18 679 679 1 679 1 679 679 0 100.000 100 100 N/A 0.0 1419 -12 1 95 95 1 95 1 95 95 0 100.000 100 100 N/A 1.23e-68 191 -13 19 1645 1645 1 1645 1 1645 1645 0 100.000 100 100 N/A 0.0 3332 -14 2 109 109 1 109 1 109 109 0 100.000 100 100 N/A 3.09e-81 224 -15 3 139 139 1 139 1 139 139 0 100.000 100 100 N/A 2.34e-106 290 -16 4 148 148 1 148 1 148 148 0 100.000 100 100 N/A 1.04e-110 302 -17 5 181 181 1 181 1 181 181 0 100.000 100 100 N/A 1.79e-140 380 -18 6 202 202 1 202 1 202 202 0 100.000 100 100 N/A 3.71e-154 416 -19 7 214 214 1 214 1 214 214 0 100.000 100 100 N/A 1.75e-157 426 -20 8 228 228 1 228 1 228 228 0 100.000 100 100 N/A 6.32e-172 463 diff --git a/tests/test_data/outputs/search/G14/blast/protein/queries.fasta b/tests/test_data/outputs/search/G14/blast/protein/queries.fasta deleted file mode 100755 index 285e5c8..0000000 --- a/tests/test_data/outputs/search/G14/blast/protein/queries.fasta +++ /dev/null @@ -1,42 +0,0 @@ ->0 -MYDPPFLEALMITASFFAIFIIIVVSVLLLEGD* ->1 -MRILLSNDDGVHAPGIQTLAKALREFADVQVVAPDRNRSGASNSLTLESSLRTFTFDNGDIAVQMGTPTDCVYLGVNALMRPRPDIVVSGINAGPNLGDDVIYSGTVAAAMEGRHLGFPALAVSLNGYQHYDTAAAVTCALLRGLSREPLRTGRILNVNVPDLPLAQVKGIRVTRCGSRHPADKVIPQEDPRGNTLYWIGPPGDKYDAGPDTDFAAVDEGYVSVTPLHVDLTAHSAHDVVSDWLDSVGVGTQW* ->2 -VNLVKTPGLHAAGKGINVANVLKDLGIDVTVGGFLGKDNQDGFQQLFSELGIANRFQVVQGRTRINVKLTEKDGEVTNFNFPGFDVTPADWERFVNDFLSWLGQFDMVCVSGSLPAGVSPEAFTDWMTRLRSQCPCIIVDTSRVELVAGLIAAPWLVKPNRRELEIWAGRKLPEMKDVIDAAPALREQGIAHVVISLGAEGALRVNASGEWIAKPPAVDVENTVGAGGSMVGGWIYGLLMRESTEHTLRLATAVAALAVSQSNVGITDRPQLAAMMARVDLQPFN* ->3 -MNKNKYSTPLLMLATILAGMLSPMQSAVNGQLGHWLQDGNACAVISFASGLVVMFFIIIARKETRQQFASIPTLIKKRKIPLWNWFAGLCGAMVVFSEGASASALGVATFQTALISALLLSGLLCDRFGIGVEEKKYFTPWRITGALFAVIATIFVVSPQWHSTSFILLAILPFLAGLLAGWQPAGNAKVAEATGSMLVSITWNFIVGFCVLGAALAIRIALGHVTIQLPDTWWMYLGGPLGLLSIGLMAILVRGLGLLMLGVASTAGQLLGSVLIDELIPSLGNTVYLVTIIGTLFALVGAIVTTIPEYRASKMAQKMEVSE* ->4 -MKGRWAKYVATGVMLAMLAACSSKPTDRGQQYKDGKFTQPFSLVNQPDAVGAPINAGDFAEQVDQIRSASPRLYTNQSNVYNAVQNWLRSGGDTRTMRQFGIDAWQMEGTDNYGNVQFTGYYTPVVQARHTRQGAFQYPIYSMPPKRGRLPSRAQIYAGALSDKYILAWSNSLMDNFIMDVQGSGYIDFGDGSPLNFFSYAGKNGWPYRSIGKVLIDRGEVKKEDMSMQAIREWGEKHSEAEVRELLEQNPSFVFFKPQSFAPVKGASAVPLIGRASVASDRSIIPPGTTLLAEVPLLDNNGKFSGQYELRLMVALDVGGAIKGQHFDIYQGIGPDAGHRAGWYNHYGRVWVLKSAPGAGNVFSG* ->5 -MKGRWAKYVATGVMLAMLAACSSKPTDRGQQYKDGKFTQPFSLVNQPDAVGAPINAGDFAEQVDQIRSASPRLYTNQSNVYNAVQNWLRSGGDTRTMRQFGIDAWQMEGTDNYGNVQFTGYYTPVVQARHTRQGAFQYPIYSMPPKRGRLPSRAQIYAGALSDKYILAWSNSLMDNFIMDVQGSGYIDFGDGSPLNFFSYAGKNGWPYRSIGKVLIDRGEVKKEDMSMQAIREWGEKHSEAEVRELLEQNPSFVFFKPQSFAPVKGASAVPLIGRASVASDRSIIPPGTTLLAEVPLLDNNGKFSGQYELRLMVALDVGGAIKGQHFDIYQGIGPDAGHRAGWYNHYGRVWVLKSAPGAGNVFSG* ->6 -LFDRYDAGEQAVLVHIYFSQDKDMEDLQEFESLVYYAGVEAMQVITGSRKAPHPKYFVGEGKAVEIAEAVKATGAAVVLFDHALSPAQERNLERLCECRVIDRTGLILDIFAQRARTHEGKLQVELAQLRHLATRLVRGWTHLERRKGGIGSRGPGETQLEADRRLLRNRIVQIQSRLEKVEKQREQGRQSRIKADVPTVSLVGYTNAGKSTLFNQITEARVYAADQLFATLDPTLRRIDVADVGETVLADTVGFIRHLPYDLVAAFKATLQETRQATQLLHVVDAADVRVQENIEAVNTVLEEIDAHEFPTLMVMNKIDMLDDFEPRIDRDEENKPIRVWLSAQSGVGIPQLFQALTERLSGEVAQHTLRLLPQEGRLRSRFYQLQAIEKEWMEEDGSVSPQVRMPIVDWRRFCKQEPALIEYVI* ->7 -VAQRILVLGASGYIGQHLVFALSQQGHQVRAAARRVERLEKHRLANVSCHKVDLHWPENLPALLRDIDTVYYLVHGMGEGGDFIAHERQAALNVRDALRQTPVKQLIFLSSLQAPAHEQSDHLRARQLTADTLRDAGVPVTELRAGIIVGAGSAAFEVMRDMVYNLPILTPPRWVRSRTTPIALENLLYYLVGLLEHPAHEHRILEAAGPQVLSYQQQFERFMAVSGKRRPLIPVPFPTRWISVWFLNVITSVPPTTAKALIQGLRHDLLADDAALKKLIPQTLITFDDAVRRTLKEEEKLVNSSDWGYDALAFARWRPEYGYFPKQAGFTAQTPASLSALWQVVNRLGGKEGYFFGNILWQTRAAMDRLVGHKLAKGRPSHTLLKPGDTVDSWKVIIVEPEKQLTLLFGMKAPGLGRLSFTLHDKGRYREIDVRAWWHPHGMPGLIYWLLMIPAHLFIFRGMARRIARLAEQITEK* ->8 -MNKFARHFLPLSLRVRFLLATAGVVLVLSLAYGIVALVGYSVSFDKTTFRLLRGESNLFYTLAKWENNKICVELPENLDMQSPTMTVIYDETGKLLWTQRNIPWLIKSTQPEGLKTNGFHEIETNVDATSTLLSEDHSAQEKLKEVREDDDDAEMTHSVAVNIYPATTRMPQLTIVVVDTIPIELKGSYMVWSWFVYVLAANLLLVIPLLWIAAWWSLRPIDALSREVRELEDHHREMLNPETTRKLISLVRNLNQLLKSEHERYNKYRTSLTDLTHSLKTPLAVLQSTLRSLRNEKMSVSKAEPVMLEQISRISQQIGYYLHRASMRGSGVLLSRELHPVAPLLDNLISALNKVYLRKGVNISMDISPEISFVGEQNDFVEVMGNVLDNACKYCLEFVEISARQTDDHLHIFVEDDGPGIAHSKRSLVFDRGQRADTLRPGQGVGLAVAREITEQYAGQIIASDSLLGGARMEVVSGRQHPTQKEE* ->9 -MTIQKRLLEAVEQKLLRPIDAQFALTVAGNDDPAVTLAAALLSHDAGEGHVCLPLSRLTLTEEAHPLLVAWISETATPIDWKKRLLASAAVSCGDSPAPLILCGDRLYLNRMWCNERTVARFFNEVNQAIAVDEDQLSRILDALFPPTDEVNWQKVAAAVALTRRISVISGGPGTGKTTTVAKLLAALIQMADGERCRIRLAAPTGKAAARLTESLGAALRQLPLTDAQKKRIPEDASTLHRLLGAQPGSQRLRHHAGNPLHLDVLVVDEASMIDLPMMSRLIDALPPHGRVIFLGDRDQLASVEAGAVLGDICAYVNAGFTAERARQLSRLTGSAIPAGAGTQAASLRDSLCLLQKSYRFGSDSGIGKLAAAINCGDRSAIQAVFQQGFSDIEKRTLQSSDDYAGMLDEALAGYGRYLRLLHEKAAPEAILQAFNEYQLLCALREGPFGVRGLNDRIEQAMVQQRKIQRHPHSRWYEGRPVMIARNDSALGLFNGDIGIALDRGQGLRVWFVMPDGTIKSVQPSRLPEHDTTWAMTVHKSQGSEFDHAALILPSQRSPVVTRELVYTAVTRARRRLSLYADERILAGAIVTRTERRSGLATLFDEVSRIG* ->10 -MQEVAMSSQEASKMLRTYNIAWWGNNYYDVNELGHISVCPDPDVPEARVDLAKLVKAREAQGQRLPALFCFPQILQHRLRSINAAFKRARESYGYNGDYFLVYPIKVNQHRRVIESLIHSGEPLGLEAGSKAELMAVLAHAGMTRSVIVCNGYKDREYIRLALIGEKMGHKVYLVIEKMSEIAIVLEEAERLNVVPRLGVRARLASQGSGKWQSSGGEKSKFGLAATQVLQLVETLRDAGRLDSLQLLHFHLGSQMANIRDIATGVRESARFYVELHKLGVNIQCFDVGGGLGVDYEGTRSQSDCSVNYGLNEYANNIIWAIGDACEEHGLPHPTVITESGRAVTAHHTVLVSNIIGVERNEYTDPTAPAEDAPRALQNLWETWQEMHKPGTRRSLREWLHDSQMDLHDIHIGYSSGAFSLQERAWAEQLYLSMCHEVQKQLDPQNRAHRPIIDELQERMADKMYVNFSLFQSMPDAWGIDQLFPVLPLEGLDQVPERRAVLLDITCDSDGAIDHYIDGDGIATTMPMPEYDPENPPMLGFFMVGAYQEILGNMHNLFGDTEAVDVFVFPDGSVEVELSDEGDTVADMLQYVQLDPKTLLTHFRDQVKQTDLDDALQQQFLEEFEAGLYGYTYLEDE* ->11 -MNSLPQRSTDFELTTSQDGFALSWQQRLILRHSAENPCLWIGAGVADIDMFRGNFSIKDKLNEKIALTEATVSELPDGWLVQFSRGATISATLRISADEAGRLTLDLQNDDLHHNRIWLRLAANPDDHIYGCGEQFSYFDLRGKPFPLWTSEQGVGRNKTSYVTWQADCKENAGGDYYLTFFPQPTFVSTQKYYCHVDNSCYMNFDFSAPEYHELALWEDKTTLRFECADTYIALLEKLTALLGRQPELPDWVYDGVTLGIQGGTEVCQQKLDNMRNAGVKVYGIWAQDWSGIRMTSFGKRVMWNWKWNSDNYPQLDSRIKQWKEEGVQFLSYINPYVASDKDLCAEAARHGYLAKDATGGDYLVEFGEFYGGVVDLTNPEAYDWFKDVIKKNMIALGCSGWMADFGEYLPTDTYLHNGVSAELMHNAWPALWAKCNYEALQKTGKLGEILFFMRAGYTGSQKYSTMMWAGDQNVDWSLDDGLASVVPAALSLAMTGHGLHHSDIGGYTTLFDMKRSKELLLRWCDFSAFTPMMRTHEGNRPSNDWQFDGDAETIAHFARMTTVFTTLKPYLKQAVAQNAATGLPVMRPLFLHYENDAATYTLKYQYLLGQDLLVAPVHEQGRCDWTLYLPEDHWVNIWTGEAHHGGEISVDAPIGKPPVFYRAKSEWALLFASLRNI* ->12 -MKLFLTTAALTATLTSGMGFASDTVIPWATNSGGTESTHIAAMGEDVNAQHQHIIHTHEGVCAANSGTIQADEAALTSNKPPVQVQPELLPHQG* ->13 -MKHLRVVACMIMLALAGCDNNDKTAPTTKSEAPAVAQPSPAQDPSQLQKLAQQSQGKALTLLDASEAQLDGAATLVLTFSIPLDPEQDFSRVVHVVDKKSGSVDGAWELAPNLKELRLRHLEPERVLVVTVDPAVKALNNATFGKSYEKTITTRDVQPSVGFASRGSLLPGKIAEGLPVMALNVNHVDVNFFRVKPGSLASFVSQWEYRSSLSNWESDNLLKMADLVYTGRFDLNPARNTREKLLLPLSDIKPLQQAGVYVAVMNQAGHYNYSNAATLFTLSDIGVSAHRYHSRLDIFTQSLENGAAQSGIEIVLLNDKGQTLAQATSDAQGHVQLEADKAAALLLARKEGQTTLLDLTLPALDLSEFNVAGAPGYSKQFFMFGPRDLYRPGETVILNGLLRDSDGKTLPDQPVKLEVVKPDGQVMRTVVSQPENGLYRLNYPLDINAPTGLWHVRANTGDNLLRSWDFHVEDFMPERMALNLTAQKTPLAPADEVKFSVVGYYLYGAPANGNTLQGQLFLRPLRDAVAALPGFQFGNIAEENLSRSLDEVQLTLDKGGRGEVSAASQWQEAHSPLQVILQASLLESGGRPVTRRVEQAIWPADTLPGIRPQFAAKAVYDYRTDTTVNQPIVDEDSNAAFDIVYANAQGEKKAVSGLQVRLIRERRDYYWNWSESEGWQSQFDQKDLVEGEQTLDLNADETGKVSFPVEWGAYRLEVKAPNETVSSVRFWAGYSWQDNSDGSGAARPDRVTLKLDKANYRPGDTMKLHIAAPVAGKGYAMVESSDGPLWWQAIDVPAQGLELTIPVDKTWNRHDLYLSTLVVRPGDKSRSATPKRAVGLLHLPLGDDNRRLDLALESPAKMRPNQPLTVRVKASVKHGEMPKQINVLVSAVDSGVLNITDYATPDPWQAFFGQKRYGADIYDIYGQVIEGQGRLAALRFGGDGDDLTRGGKPPVNHANIIAQQAQPITLNEQGEGVVTLPIGDFNGELRVMAQAWTADDFGRGESKVVVAAPVIAELNMPRFLAGGDVSRLVLDVTNLTDRPQTLNIALAASGLLELLSQQPQPVNLAPGVRTTLFVPVRALEGFGEGEIQATISGLNLPGETLGAQHKQWQIGVRPAWPAQTVNSGIALAPGESWHVPEQHLANVSPATLQGQLLLSGKPPLNLARYIRELKAYPYGCLEQTTSGLFPALYTNAAQLQSLGITGDSDEKRRAAVDIGISRILQMQRDNGGFALWDENGAEEPWLTAYAMDFLIRAGEQGYSVPPEAINRGNERLLRYLQDPGTMLIRYSDNTQASTFAAQAYAALVLARQQKAPLGALREIWERRSQAASGLPLMQLGIALNTMGDARRGEEAITLALNTPRQDERQWIADYGSSLRDNALMLSLLEENNLRPDAQNALLSSLSEQAFGQRWLSTQENNALFLAAHSRQASAGAWQAQTSLEAQPLSGDKALTRNLDADQLAALEVTNTGSQPLWLRLDSSGYPSSAPEPASNVLQIERQILGTDGQRKSLSSLRSGELVLVWLTVVADRNVPDALVVDLLPAGLELENQNLADSSASLPESGSEVQNLLNQMQQADIQYMEFRDDRFVAAVVVNEGQPVTLVYLARAVTPGTYQLAQPQVESMYAPQWRATGASEGLLIVTP* ->14 -MAKNRSRRLRKKMHIDEFQELGFSVAWRFPEGTSEEQIDKTVDDFINDVIEPNKLAFDGSGYLAWEGLICMQEIGKCTEEHQAIVRKWLEARNLEEVRTSELFDVWWD* ->15 -LKLIPFYLLALFSASSGATEINACKDLIGTWKTPADNPPYTVTILPPVEACGEKCVKLNVEYELDVTHRNALYCHERQEGVKGQGPMVIAFEGAYGGHAIGTYNRQLQLLRAGVIPKNKKWKWITKMENYWFRRVKAH* ->16 -MKKFDSCHPVFLLIGCAQVPLRSSVSKPVQQPSAQKEQLANANGIDECQSLPYVPSDLAKNKSLSNQIADNTASKNSAISSRIFCEKYKQTKEQALTFFQELPQYMRSKEVEEQHMTEFKKVLLEPGSKNLSIYQTLLAAHERLQAL* ->17 -MRIKPDDNWRWYYDEEHDRMMLDLANGMLFRSRFSRKMLTPDAFCPTGFCVDDAALYFSFEEKCRDFELTKEQRAELVLNALVAIRYLKPQMPKSWHFVAHGEMWTPGTGDAASVWLSDTAEQVNLLVVEPGENAALCLLAQPGVVIAGRTMQLGDAIKIMNDRLKPQVHCHSFSLEQAV* ->18 -VLAFTLRFIKNKRYFAILAGALVIIAGLDSQHACSGNGLPQINGKALAALAKQHPVVVLFRHAERCDRSDNTCLPDSTGITVNGAQDARALGKAFSADIQNYNLYSSNTVRAIQSATWFSAGRSRRAVKKMMDCGSGIYSSINTLLKNSQIKNIVILTHNHCLTYIVKNKRGVKFDPEYLNALVMYAENGKLLLDGEFVPG* ->19 -MGSNYIVIEGLEGAGKTTARDVVVETLEQLGIRNMIFTREPGGTQLAEKLRSLVLDIRSVGDEVITDKAEVLMFYAARVQLVETVIKPALAQGVWVIGDRHDLSTQAYQGGGRGIDQTMLATLRDAVLGDFRPDLTLYLDVTPEVGLKRARARGDLDRIEQESFDFFNRTRARYLELAAQDSRIRTIDATQPLDAVMRDIRATVTKWVQEQAA* ->20 -MKHIKKSVLVVLLTSHVAHASIVVGGTRLVFDGNNDESSINVENKDSKANLVQSWLSVADPQVTNKQVFIITPPLFRLDAGQKNSIRVIRSGAPLPADRESMYWLNIKGIPSIDDNASANRVEISINTQIKLIYRPPALTKSTPDSQSQQLKWQTAGDVITVNNPTPYYMNFASVTLNSHEVKSATFVPPKSSASFKLSSTAAPHGTVTWRLISDYGMSLEPHSGSF* diff --git a/tests/test_data/outputs/search/G14/run.json b/tests/test_data/outputs/search/G14/run.json index 179734a..9a8a399 100755 --- a/tests/test_data/outputs/search/G14/run.json +++ b/tests/test_data/outputs/search/G14/run.json @@ -1,11 +1,15 @@ { - "analysis_start_time": "10/06/2024 11:12:49", + "analysis_start_time": "19/06/2024 10:44:07", "parameters": { - "query": "locidex/extract/G14/raw.extracted.seqs.fasta", - "outdir": "locidex/search/G14", + "command": "search", + "query": "test_dev/extract/G14/raw.extracted.seqs.fasta", + "outdir": "test_dev/search/G14", + "db": "test_set/db", + "db_group": null, "name": "G14", - "db": "locidex/db", "config": null, + "db_name": "Locidex Test Database", + "db_version": "1.0.0", "min_evalue": 0.0001, "min_dna_len": 1, "min_aa_len": 1, @@ -20,8 +24,16 @@ "format": null, "translation_table": 11, "annotate": false, - "force": true + "force": true, + "db_date": "2024/06/06", + "db_author": "James Robertson", + "db_desc": "Using Salmonella GCA_000007545.2 as source data", + "db_num_seqs": 20, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" }, - "result_file": "locidex/search/G14/seq_store.json", - "analysis_end_time": "10/06/2024 11:12:51" + "result_file": "test_dev/search/G14/seq_store.json", + "analysis_end_time": "19/06/2024 10:44:08" } \ No newline at end of file diff --git a/tests/test_data/outputs/search/G14/seq_store.json b/tests/test_data/outputs/search/G14/seq_store.json index 9ee016f..7d48736 100755 --- a/tests/test_data/outputs/search/G14/seq_store.json +++ b/tests/test_data/outputs/search/G14/seq_store.json @@ -460,274 +460,316 @@ "parent_id": "locus_1:0:0:0", "locus_name": "locus_1:0:0:0", "seq_id": "locus_1:0:0:0", + "dna_seq": "atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa", + "dna_ambig_count": 0, "dna_hash": "d17b02d12afa7f832ee37df6f24a8f55", "dna_len": 102, + "aa_seq": "MYDPPFLEALMITASFFAIFIIIVVSVLLLEGD*", "aa_hash": "a931d1f75114576e60538364eb01a05f", "aa_len": 34, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "1": { "parent_id": "locus_10:9:0:1", "locus_name": "locus_10:9:0:1", "seq_id": "locus_10:9:0:1", + "dna_seq": "atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa", + "dna_ambig_count": 0, "dna_hash": "fe04d17ec353c08b903c85fc0ca4dc02", "dna_len": 762, + "aa_seq": "MRILLSNDDGVHAPGIQTLAKALREFADVQVVAPDRNRSGASNSLTLESSLRTFTFDNGDIAVQMGTPTDCVYLGVNALMRPRPDIVVSGINAGPNLGDDVIYSGTVAAAMEGRHLGFPALAVSLNGYQHYDTAAAVTCALLRGLSREPLRTGRILNVNVPDLPLAQVKGIRVTRCGSRHPADKVIPQEDPRGNTLYWIGPPGDKYDAGPDTDFAAVDEGYVSVTPLHVDLTAHSAHDVVSDWLDSVGVGTQW*", "aa_hash": "988bf512f0362e276b0e5622fbaa7079", "aa_len": 254, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "2": { "parent_id": "locus_11:10:0:2", "locus_name": "locus_11:10:0:2", "seq_id": "locus_11:10:0:2", + "dna_seq": "gtgaacctggtgaaaaccccgggtctgcacgcggcgggcaaaggcattaacgttgccaatgtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccaacttcaactttcccggctttgacgtgactcccgcagactgggaacgctttgttaacgacttcctgagctggctgggtcagttcgatatggtctgcgtaagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatcgttgatactagccgtgtagagttagtcgccggtcttatagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcccgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgcgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggaaaataccgtcggcgcaggcggttccatggttggcggatggatttacggcctgctgatgcgcgagtccaccgaacatacgctgcgcctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga", + "dna_ambig_count": 0, "dna_hash": "5b128d659955716833ce42f2bb060212", "dna_len": 858, + "aa_seq": "VNLVKTPGLHAAGKGINVANVLKDLGIDVTVGGFLGKDNQDGFQQLFSELGIANRFQVVQGRTRINVKLTEKDGEVTNFNFPGFDVTPADWERFVNDFLSWLGQFDMVCVSGSLPAGVSPEAFTDWMTRLRSQCPCIIVDTSRVELVAGLIAAPWLVKPNRRELEIWAGRKLPEMKDVIDAAPALREQGIAHVVISLGAEGALRVNASGEWIAKPPAVDVENTVGAGGSMVGGWIYGLLMRESTEHTLRLATAVAALAVSQSNVGITDRPQLAAMMARVDLQPFN*", "aa_hash": "d6a46f107d0604f27820147b523948c8", "aa_len": 286, "start_codon": "gtg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "3": { "parent_id": "locus_12:11:0:3", "locus_name": "locus_12:11:0:3", "seq_id": "locus_12:11:0:3", + "dna_seq": "atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga", + "dna_ambig_count": 0, "dna_hash": "eb72da68c159497d5f0c8eeddc51b5ae", "dna_len": 972, + "aa_seq": "MNKNKYSTPLLMLATILAGMLSPMQSAVNGQLGHWLQDGNACAVISFASGLVVMFFIIIARKETRQQFASIPTLIKKRKIPLWNWFAGLCGAMVVFSEGASASALGVATFQTALISALLLSGLLCDRFGIGVEEKKYFTPWRITGALFAVIATIFVVSPQWHSTSFILLAILPFLAGLLAGWQPAGNAKVAEATGSMLVSITWNFIVGFCVLGAALAIRIALGHVTIQLPDTWWMYLGGPLGLLSIGLMAILVRGLGLLMLGVASTAGQLLGSVLIDELIPSLGNTVYLVTIIGTLFALVGAIVTTIPEYRASKMAQKMEVSE*", "aa_hash": "2fb77f6d0f615a030a08af74f3eda277", "aa_len": 324, "start_codon": "atg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "4": { "parent_id": "locus_13:12:0:4", "locus_name": "locus_13:12:0:4", "seq_id": "locus_13:12:0:4", + "dna_seq": "atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga", + "dna_ambig_count": 0, "dna_hash": "8f300259dcb46224bdc1fe5273107324", "dna_len": 1098, + "aa_seq": "MKGRWAKYVATGVMLAMLAACSSKPTDRGQQYKDGKFTQPFSLVNQPDAVGAPINAGDFAEQVDQIRSASPRLYTNQSNVYNAVQNWLRSGGDTRTMRQFGIDAWQMEGTDNYGNVQFTGYYTPVVQARHTRQGAFQYPIYSMPPKRGRLPSRAQIYAGALSDKYILAWSNSLMDNFIMDVQGSGYIDFGDGSPLNFFSYAGKNGWPYRSIGKVLIDRGEVKKEDMSMQAIREWGEKHSEAEVRELLEQNPSFVFFKPQSFAPVKGASAVPLIGRASVASDRSIIPPGTTLLAEVPLLDNNGKFSGQYELRLMVALDVGGAIKGQHFDIYQGIGPDAGHRAGWYNHYGRVWVLKSAPGAGNVFSG*", "aa_hash": "fd5140611af6e0d9f95426bf9beaa8b1", "aa_len": 366, "start_codon": "atg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "5": { "parent_id": "locus_13:12:0:5", "locus_name": "locus_13:12:0:5", "seq_id": "locus_13:12:0:5", + "dna_seq": "atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga", + "dna_ambig_count": 0, "dna_hash": "8f300259dcb46224bdc1fe5273107324", "dna_len": 1098, + "aa_seq": "MKGRWAKYVATGVMLAMLAACSSKPTDRGQQYKDGKFTQPFSLVNQPDAVGAPINAGDFAEQVDQIRSASPRLYTNQSNVYNAVQNWLRSGGDTRTMRQFGIDAWQMEGTDNYGNVQFTGYYTPVVQARHTRQGAFQYPIYSMPPKRGRLPSRAQIYAGALSDKYILAWSNSLMDNFIMDVQGSGYIDFGDGSPLNFFSYAGKNGWPYRSIGKVLIDRGEVKKEDMSMQAIREWGEKHSEAEVRELLEQNPSFVFFKPQSFAPVKGASAVPLIGRASVASDRSIIPPGTTLLAEVPLLDNNGKFSGQYELRLMVALDVGGAIKGQHFDIYQGIGPDAGHRAGWYNHYGRVWVLKSAPGAGNVFSG*", "aa_hash": "fd5140611af6e0d9f95426bf9beaa8b1", "aa_len": 366, "start_codon": "atg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "6": { "parent_id": "locus_14:13:0:6", "locus_name": "locus_14:13:0:6", "seq_id": "locus_14:13:0:6", + "dna_seq": "ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctattacgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccagcgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcggaaaggcgggattggttcgcgcggtccgggtgaaacccagctcgaagccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgagaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgcagctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaattccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtccgcaggtacgaatgccgattgtcgactggcgtcgcttctgtaaacaagaaccagcgttgatcgaatacgtgatctag", + "dna_ambig_count": 0, "dna_hash": "b9060019038526aa6fc38d2f7510edc6", "dna_len": 1281, + "aa_seq": "LFDRYDAGEQAVLVHIYFSQDKDMEDLQEFESLVYYAGVEAMQVITGSRKAPHPKYFVGEGKAVEIAEAVKATGAAVVLFDHALSPAQERNLERLCECRVIDRTGLILDIFAQRARTHEGKLQVELAQLRHLATRLVRGWTHLERRKGGIGSRGPGETQLEADRRLLRNRIVQIQSRLEKVEKQREQGRQSRIKADVPTVSLVGYTNAGKSTLFNQITEARVYAADQLFATLDPTLRRIDVADVGETVLADTVGFIRHLPYDLVAAFKATLQETRQATQLLHVVDAADVRVQENIEAVNTVLEEIDAHEFPTLMVMNKIDMLDDFEPRIDRDEENKPIRVWLSAQSGVGIPQLFQALTERLSGEVAQHTLRLLPQEGRLRSRFYQLQAIEKEWMEEDGSVSPQVRMPIVDWRRFCKQEPALIEYVI*", "aa_hash": "05bc7823b1abc2e6d4e2c08ca5325134", "aa_len": 427, "start_codon": "ttg", - "stop_codon": "tag", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tag", + "count_internal_stop": 0 }, "7": { "parent_id": "locus_15:14:0:7", "locus_name": "locus_15:14:0:7", "seq_id": "locus_15:14:0:7", + "dna_seq": "gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga", + "dna_ambig_count": 0, "dna_hash": "bc98c2fe196a68a79036814396513a8d", "dna_len": 1434, + "aa_seq": "VAQRILVLGASGYIGQHLVFALSQQGHQVRAAARRVERLEKHRLANVSCHKVDLHWPENLPALLRDIDTVYYLVHGMGEGGDFIAHERQAALNVRDALRQTPVKQLIFLSSLQAPAHEQSDHLRARQLTADTLRDAGVPVTELRAGIIVGAGSAAFEVMRDMVYNLPILTPPRWVRSRTTPIALENLLYYLVGLLEHPAHEHRILEAAGPQVLSYQQQFERFMAVSGKRRPLIPVPFPTRWISVWFLNVITSVPPTTAKALIQGLRHDLLADDAALKKLIPQTLITFDDAVRRTLKEEEKLVNSSDWGYDALAFARWRPEYGYFPKQAGFTAQTPASLSALWQVVNRLGGKEGYFFGNILWQTRAAMDRLVGHKLAKGRPSHTLLKPGDTVDSWKVIIVEPEKQLTLLFGMKAPGLGRLSFTLHDKGRYREIDVRAWWHPHGMPGLIYWLLMIPAHLFIFRGMARRIARLAEQITEK*", "aa_hash": "5d8c3cf06444c3741ebfc57cc92db0d3", "aa_len": 478, "start_codon": "gtg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "8": { "parent_id": "locus_16:15:0:8", "locus_name": "locus_16:15:0:8", "seq_id": "locus_16:15:0:8", + "dna_seq": "atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctcgtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatctgcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacggtgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcactcaaccggaagggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaaggctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgatgcgctgtcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtaagctgatcagccttgtgcgcaaccttaatcaactgctcaaaagcgagcacgaacgttataacaaataccgcacgagcctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatctgcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcatagcccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttctggccgacagcatcccacacagaaagaggaataa", + "dna_ambig_count": 0, "dna_hash": "16e55766c603fe33c9e75d8e81743ae2", "dna_len": 1464, + "aa_seq": "MNKFARHFLPLSLRVRFLLATAGVVLVLSLAYGIVALVGYSVSFDKTTFRLLRGESNLFYTLAKWENNKICVELPENLDMQSPTMTVIYDETGKLLWTQRNIPWLIKSTQPEGLKTNGFHEIETNVDATSTLLSEDHSAQEKLKEVREDDDDAEMTHSVAVNIYPATTRMPQLTIVVVDTIPIELKGSYMVWSWFVYVLAANLLLVIPLLWIAAWWSLRPIDALSREVRELEDHHREMLNPETTRKLISLVRNLNQLLKSEHERYNKYRTSLTDLTHSLKTPLAVLQSTLRSLRNEKMSVSKAEPVMLEQISRISQQIGYYLHRASMRGSGVLLSRELHPVAPLLDNLISALNKVYLRKGVNISMDISPEISFVGEQNDFVEVMGNVLDNACKYCLEFVEISARQTDDHLHIFVEDDGPGIAHSKRSLVFDRGQRADTLRPGQGVGLAVAREITEQYAGQIIASDSLLGGARMEVVSGRQHPTQKEE*", "aa_hash": "f85b3701f5642454bf4d2263feb13354", "aa_len": 488, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "9": { "parent_id": "locus_17:16:0:9", "locus_name": "locus_17:16:0:9", "seq_id": "locus_17:16:0:9", + "dna_seq": "atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa", + "dna_ambig_count": 0, "dna_hash": "a0d97d985483413f3c18bfe5833ae9ce", "dna_len": 1836, + "aa_seq": "MTIQKRLLEAVEQKLLRPIDAQFALTVAGNDDPAVTLAAALLSHDAGEGHVCLPLSRLTLTEEAHPLLVAWISETATPIDWKKRLLASAAVSCGDSPAPLILCGDRLYLNRMWCNERTVARFFNEVNQAIAVDEDQLSRILDALFPPTDEVNWQKVAAAVALTRRISVISGGPGTGKTTTVAKLLAALIQMADGERCRIRLAAPTGKAAARLTESLGAALRQLPLTDAQKKRIPEDASTLHRLLGAQPGSQRLRHHAGNPLHLDVLVVDEASMIDLPMMSRLIDALPPHGRVIFLGDRDQLASVEAGAVLGDICAYVNAGFTAERARQLSRLTGSAIPAGAGTQAASLRDSLCLLQKSYRFGSDSGIGKLAAAINCGDRSAIQAVFQQGFSDIEKRTLQSSDDYAGMLDEALAGYGRYLRLLHEKAAPEAILQAFNEYQLLCALREGPFGVRGLNDRIEQAMVQQRKIQRHPHSRWYEGRPVMIARNDSALGLFNGDIGIALDRGQGLRVWFVMPDGTIKSVQPSRLPEHDTTWAMTVHKSQGSEFDHAALILPSQRSPVVTRELVYTAVTRARRRLSLYADERILAGAIVTRTERRSGLATLFDEVSRIG*", "aa_hash": "bdd2b967a7a6337b5973dd723a62fa43", "aa_len": 612, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "10": { "parent_id": "locus_18:17:0:10", "locus_name": "locus_18:17:0:10", "seq_id": "locus_18:17:0:10", + "dna_seq": "atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag", + "dna_ambig_count": 0, "dna_hash": "b3021e979faa7600756c06dfadfcf14c", "dna_len": 1914, + "aa_seq": "MQEVAMSSQEASKMLRTYNIAWWGNNYYDVNELGHISVCPDPDVPEARVDLAKLVKAREAQGQRLPALFCFPQILQHRLRSINAAFKRARESYGYNGDYFLVYPIKVNQHRRVIESLIHSGEPLGLEAGSKAELMAVLAHAGMTRSVIVCNGYKDREYIRLALIGEKMGHKVYLVIEKMSEIAIVLEEAERLNVVPRLGVRARLASQGSGKWQSSGGEKSKFGLAATQVLQLVETLRDAGRLDSLQLLHFHLGSQMANIRDIATGVRESARFYVELHKLGVNIQCFDVGGGLGVDYEGTRSQSDCSVNYGLNEYANNIIWAIGDACEEHGLPHPTVITESGRAVTAHHTVLVSNIIGVERNEYTDPTAPAEDAPRALQNLWETWQEMHKPGTRRSLREWLHDSQMDLHDIHIGYSSGAFSLQERAWAEQLYLSMCHEVQKQLDPQNRAHRPIIDELQERMADKMYVNFSLFQSMPDAWGIDQLFPVLPLEGLDQVPERRAVLLDITCDSDGAIDHYIDGDGIATTMPMPEYDPENPPMLGFFMVGAYQEILGNMHNLFGDTEAVDVFVFPDGSVEVELSDEGDTVADMLQYVQLDPKTLLTHFRDQVKQTDLDDALQQQFLEEFEAGLYGYTYLEDE*", "aa_hash": "42c4a831ee79a27c47138fe96829814b", "aa_len": 638, "start_codon": "atg", - "stop_codon": "tag", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tag", + "count_internal_stop": 0 }, "11": { "parent_id": "locus_19:18:0:11", "locus_name": "locus_19:18:0:11", "seq_id": "locus_19:18:0:11", + "dna_seq": "atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacatcgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccacagtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggtcgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattacttgaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcatacagggcggtacggaagtttgtcagcaaaaactggataacatgcgcaacgcaggcgtaaaagtgtacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggggtcagcgcagagctcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacagaagaccggcaagctcggcgagatcctgttctttatgcgggcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccagcaatgactggcagttcgacggcgacgcggaaactattgcacactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgagatttccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa", + "dna_ambig_count": 0, "dna_hash": "a012eee23637b48e39b00808a057e35d", "dna_len": 2037, + "aa_seq": "MNSLPQRSTDFELTTSQDGFALSWQQRLILRHSAENPCLWIGAGVADIDMFRGNFSIKDKLNEKIALTEATVSELPDGWLVQFSRGATISATLRISADEAGRLTLDLQNDDLHHNRIWLRLAANPDDHIYGCGEQFSYFDLRGKPFPLWTSEQGVGRNKTSYVTWQADCKENAGGDYYLTFFPQPTFVSTQKYYCHVDNSCYMNFDFSAPEYHELALWEDKTTLRFECADTYIALLEKLTALLGRQPELPDWVYDGVTLGIQGGTEVCQQKLDNMRNAGVKVYGIWAQDWSGIRMTSFGKRVMWNWKWNSDNYPQLDSRIKQWKEEGVQFLSYINPYVASDKDLCAEAARHGYLAKDATGGDYLVEFGEFYGGVVDLTNPEAYDWFKDVIKKNMIALGCSGWMADFGEYLPTDTYLHNGVSAELMHNAWPALWAKCNYEALQKTGKLGEILFFMRAGYTGSQKYSTMMWAGDQNVDWSLDDGLASVVPAALSLAMTGHGLHHSDIGGYTTLFDMKRSKELLLRWCDFSAFTPMMRTHEGNRPSNDWQFDGDAETIAHFARMTTVFTTLKPYLKQAVAQNAATGLPVMRPLFLHYENDAATYTLKYQYLLGQDLLVAPVHEQGRCDWTLYLPEDHWVNIWTGEAHHGGEISVDAPIGKPPVFYRAKSEWALLFASLRNI*", "aa_hash": "cb1202450e68e2b4f0d557a645f1a98d", "aa_len": 679, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "12": { "parent_id": "locus_2:1:0:12", "locus_name": "locus_2:1:0:12", "seq_id": "locus_2:1:0:12", + "dna_seq": "atgaaactttttctgacaacggcagcgctaactgcgaccttgacttccggcatgggatttgccagcgataccgtgatcccctgggcgactaacagtggcggtacggaaagcacgcatatcgcggcaatgggcgaggatgtgaatgcgcagcatcagcacatcatccacacgcatgaaggcgtatgtgccgcaaattccggcaccattcaggctgatgaagccgcgctgaccagcaacaagccaccggtacaagtccaaccggaattgttgccgcatcagggctaa", + "dna_ambig_count": 0, "dna_hash": "e35184c8ff18e9116fc8faef20532f56", "dna_len": 285, + "aa_seq": "MKLFLTTAALTATLTSGMGFASDTVIPWATNSGGTESTHIAAMGEDVNAQHQHIIHTHEGVCAANSGTIQADEAALTSNKPPVQVQPELLPHQG*", "aa_hash": "2a1a77c25ad681437705d9145aef608c", "aa_len": 95, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "13": { "parent_id": "locus_20:19:0:13", "locus_name": "locus_20:19:0:13", "seq_id": "locus_20:19:0:13", + "dna_seq": "atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa", + "dna_ambig_count": 0, "dna_hash": "4461918e985715e4a2b07494e1f91326", "dna_len": 4935, + "aa_seq": "MKHLRVVACMIMLALAGCDNNDKTAPTTKSEAPAVAQPSPAQDPSQLQKLAQQSQGKALTLLDASEAQLDGAATLVLTFSIPLDPEQDFSRVVHVVDKKSGSVDGAWELAPNLKELRLRHLEPERVLVVTVDPAVKALNNATFGKSYEKTITTRDVQPSVGFASRGSLLPGKIAEGLPVMALNVNHVDVNFFRVKPGSLASFVSQWEYRSSLSNWESDNLLKMADLVYTGRFDLNPARNTREKLLLPLSDIKPLQQAGVYVAVMNQAGHYNYSNAATLFTLSDIGVSAHRYHSRLDIFTQSLENGAAQSGIEIVLLNDKGQTLAQATSDAQGHVQLEADKAAALLLARKEGQTTLLDLTLPALDLSEFNVAGAPGYSKQFFMFGPRDLYRPGETVILNGLLRDSDGKTLPDQPVKLEVVKPDGQVMRTVVSQPENGLYRLNYPLDINAPTGLWHVRANTGDNLLRSWDFHVEDFMPERMALNLTAQKTPLAPADEVKFSVVGYYLYGAPANGNTLQGQLFLRPLRDAVAALPGFQFGNIAEENLSRSLDEVQLTLDKGGRGEVSAASQWQEAHSPLQVILQASLLESGGRPVTRRVEQAIWPADTLPGIRPQFAAKAVYDYRTDTTVNQPIVDEDSNAAFDIVYANAQGEKKAVSGLQVRLIRERRDYYWNWSESEGWQSQFDQKDLVEGEQTLDLNADETGKVSFPVEWGAYRLEVKAPNETVSSVRFWAGYSWQDNSDGSGAARPDRVTLKLDKANYRPGDTMKLHIAAPVAGKGYAMVESSDGPLWWQAIDVPAQGLELTIPVDKTWNRHDLYLSTLVVRPGDKSRSATPKRAVGLLHLPLGDDNRRLDLALESPAKMRPNQPLTVRVKASVKHGEMPKQINVLVSAVDSGVLNITDYATPDPWQAFFGQKRYGADIYDIYGQVIEGQGRLAALRFGGDGDDLTRGGKPPVNHANIIAQQAQPITLNEQGEGVVTLPIGDFNGELRVMAQAWTADDFGRGESKVVVAAPVIAELNMPRFLAGGDVSRLVLDVTNLTDRPQTLNIALAASGLLELLSQQPQPVNLAPGVRTTLFVPVRALEGFGEGEIQATISGLNLPGETLGAQHKQWQIGVRPAWPAQTVNSGIALAPGESWHVPEQHLANVSPATLQGQLLLSGKPPLNLARYIRELKAYPYGCLEQTTSGLFPALYTNAAQLQSLGITGDSDEKRRAAVDIGISRILQMQRDNGGFALWDENGAEEPWLTAYAMDFLIRAGEQGYSVPPEAINRGNERLLRYLQDPGTMLIRYSDNTQASTFAAQAYAALVLARQQKAPLGALREIWERRSQAASGLPLMQLGIALNTMGDARRGEEAITLALNTPRQDERQWIADYGSSLRDNALMLSLLEENNLRPDAQNALLSSLSEQAFGQRWLSTQENNALFLAAHSRQASAGAWQAQTSLEAQPLSGDKALTRNLDADQLAALEVTNTGSQPLWLRLDSSGYPSSAPEPASNVLQIERQILGTDGQRKSLSSLRSGELVLVWLTVVADRNVPDALVVDLLPAGLELENQNLADSSASLPESGSEVQNLLNQMQQADIQYMEFRDDRFVAAVVVNEGQPVTLVYLARAVTPGTYQLAQPQVESMYAPQWRATGASEGLLIVTP*", "aa_hash": "ffe361d9273afbab0ed9009dd5aa4041", "aa_len": 1645, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "14": { "parent_id": "locus_3:2:0:14", "locus_name": "locus_3:2:0:14", "seq_id": "locus_3:2:0:14", + "dna_seq": "atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa", + "dna_ambig_count": 0, "dna_hash": "670705cd2a59c4a23a897ac656a888fe", "dna_len": 327, + "aa_seq": "MAKNRSRRLRKKMHIDEFQELGFSVAWRFPEGTSEEQIDKTVDDFINDVIEPNKLAFDGSGYLAWEGLICMQEIGKCTEEHQAIVRKWLEARNLEEVRTSELFDVWWD*", "aa_hash": "f46f4f7b2362abaf1440c9752e10670e", "aa_len": 109, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "15": { "parent_id": "locus_4:3:0:15", "locus_name": "locus_4:3:0:15", "seq_id": "locus_4:3:0:15", + "dna_seq": "ctgaaactaatacctttttatttgctcgcgctattttctgcctcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaacccctgcagataacccgccttataccgtgacaatattgccgccagtagaagcctgcggggaaaaatgtgtgaaactgaacgtagagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaacgacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctacaaccgacagctgcaattacttagggctggcgtaataccgaaaaataaaaagtggaaatggataacgaagatggaaaattactggttcaggcgggtaaaagcgcactaa", + "dna_ambig_count": 0, "dna_hash": "ac1b21798c0f672ad26f5a91ea278590", "dna_len": 417, + "aa_seq": "LKLIPFYLLALFSASSGATEINACKDLIGTWKTPADNPPYTVTILPPVEACGEKCVKLNVEYELDVTHRNALYCHERQEGVKGQGPMVIAFEGAYGGHAIGTYNRQLQLLRAGVIPKNKKWKWITKMENYWFRRVKAH*", "aa_hash": "dbcec3a0e9ecdc165c4e9162b079f2ee", "aa_len": 139, "start_codon": "ctg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "16": { "parent_id": "locus_5:4:0:16", "locus_name": "locus_5:4:0:16", "seq_id": "locus_5:4:0:16", + "dna_seq": "atgaaaaaatttgatagctgtcatcctgtctttttactgatcggctgtgcccaggtgcctctccgttcctccgtgagcaaaccggtacagcaacctagcgctcagaaagagcaactggccaacgcaaatggtattgatgagtgtcagtcacttccgtatgtgccttcagaccttgcgaagaataaatcattatcaaaccagatcgctgataataccgcatcaaaaaatagcgcaatcagctcacgcattttttgcgaaaaatataagcaaaccaaagaacaggcgcttaccttcttccaggaacttccacaatacatgcgttcgaaagaagttgaagagcaacacatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa", + "dna_ambig_count": 0, "dna_hash": "d00defcca8588f21ce16fa1d0ac13389", "dna_len": 444, + "aa_seq": "MKKFDSCHPVFLLIGCAQVPLRSSVSKPVQQPSAQKEQLANANGIDECQSLPYVPSDLAKNKSLSNQIADNTASKNSAISSRIFCEKYKQTKEQALTFFQELPQYMRSKEVEEQHMTEFKKVLLEPGSKNLSIYQTLLAAHERLQAL*", "aa_hash": "82d8baa0a3dad18a0efd8104ee15baae", "aa_len": 148, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "17": { "parent_id": "locus_6:5:0:17", "locus_name": "locus_6:5:0:17", "seq_id": "locus_6:5:0:17", + "dna_seq": "atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa", + "dna_ambig_count": 0, "dna_hash": "a11561f2804e2c32c78049f8b9aeb517", "dna_len": 543, + "aa_seq": "MRIKPDDNWRWYYDEEHDRMMLDLANGMLFRSRFSRKMLTPDAFCPTGFCVDDAALYFSFEEKCRDFELTKEQRAELVLNALVAIRYLKPQMPKSWHFVAHGEMWTPGTGDAASVWLSDTAEQVNLLVVEPGENAALCLLAQPGVVIAGRTMQLGDAIKIMNDRLKPQVHCHSFSLEQAV*", "aa_hash": "f7a904be6607ca5a6d9388ad9ca3693a", "aa_len": 181, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "18": { "parent_id": "locus_7:6:0:18", "locus_name": "locus_7:6:0:18", "seq_id": "locus_7:6:0:18", + "dna_seq": "gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggactcacaacatgcctgtagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgccagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcgccatccagtcggcaacctggttttcggcaggcaggtcgcgtagggcggttaaaaaaatgatggattgcggcagcggtatttattcttcaatcaatacgttactgaaaaattcgcagattaaaaatatagtcattttaacccataatcattgtttgacgtatattgtaaaaaataagcgcggcgtgaagtttgatccggagtatcttaatgcgctcgttatgtatgcggaaaacggaaaactccttttagacggtgaatttgttcccggctag", + "dna_ambig_count": 0, "dna_hash": "dc94bf1ec4ff9bed2a1f460cbd958656", "dna_len": 606, + "aa_seq": "VLAFTLRFIKNKRYFAILAGALVIIAGLDSQHACSGNGLPQINGKALAALAKQHPVVVLFRHAERCDRSDNTCLPDSTGITVNGAQDARALGKAFSADIQNYNLYSSNTVRAIQSATWFSAGRSRRAVKKMMDCGSGIYSSINTLLKNSQIKNIVILTHNHCLTYIVKNKRGVKFDPEYLNALVMYAENGKLLLDGEFVPG*", "aa_hash": "da78b534d889d8f35bec304ef54f1b93", "aa_len": 202, "start_codon": "gtg", - "stop_codon": "tag", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tag", + "count_internal_stop": 0 }, "19": { "parent_id": "locus_8:7:0:19", "locus_name": "locus_8:7:0:19", "seq_id": "locus_8:7:0:19", + "dna_seq": "atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga", + "dna_ambig_count": 0, "dna_hash": "7ebe74afecf146ec4db816c8deced64f", "dna_len": 642, + "aa_seq": "MGSNYIVIEGLEGAGKTTARDVVVETLEQLGIRNMIFTREPGGTQLAEKLRSLVLDIRSVGDEVITDKAEVLMFYAARVQLVETVIKPALAQGVWVIGDRHDLSTQAYQGGGRGIDQTMLATLRDAVLGDFRPDLTLYLDVTPEVGLKRARARGDLDRIEQESFDFFNRTRARYLELAAQDSRIRTIDATQPLDAVMRDIRATVTKWVQEQAA*", "aa_hash": "ad5a47cb3a55985083b963b0cf67f382", "aa_len": 214, "start_codon": "atg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "20": { "parent_id": "locus_9:8:0:20", "locus_name": "locus_9:8:0:20", "seq_id": "locus_9:8:0:20", + "dna_seq": "atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag", + "dna_ambig_count": 0, "dna_hash": "41ebb36872854b2b33c8c028e23d8ad1", "dna_len": 684, + "aa_seq": "MKHIKKSVLVVLLTSHVAHASIVVGGTRLVFDGNNDESSINVENKDSKANLVQSWLSVADPQVTNKQVFIITPPLFRLDAGQKNSIRVIRSGAPLPADRESMYWLNIKGIPSIDDNASANRVEISINTQIKLIYRPPALTKSTPDSQSQQLKWQTAGDVITVNNPTPYYMNFASVTLNSHEVKSATFVPPKSSASFKLSSTAAPHGTVTWRLISDYGMSLEPHSGSF*", "aa_hash": "02989d1dbb505d15f06e76ddc9174b7e", "aa_len": 228, "start_codon": "atg", - "stop_codon": "tag", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tag", + "count_internal_stop": 0 } }, "query_hit_columns": [], @@ -1714,12 +1756,12 @@ }, "locus_13": { "nucleotide": [ - "4", - "5" + "5", + "4" ], "protein": [ - "4", - "5" + "5", + "4" ] }, "locus_14": { diff --git a/tests/test_data/outputs/search/G2/blast/nucleotide/hsps.txt b/tests/test_data/outputs/search/G2/blast/nucleotide/hsps.txt deleted file mode 100755 index 00093e5..0000000 --- a/tests/test_data/outputs/search/G2/blast/nucleotide/hsps.txt +++ /dev/null @@ -1,20 +0,0 @@ -0 0 102 102 1 102 1 102 102 0 100.000 100 100 plus 1.80e-51 189 -1 9 762 762 1 762 1 762 762 0 100.000 100 100 plus 0.0 1408 -2 10 858 858 1 858 1 858 858 0 100.000 100 100 plus 0.0 1585 -3 11 972 972 1 972 1 972 972 0 100.000 100 100 plus 0.0 1796 -4 12 1098 1098 1 1098 1 1098 1098 0 100.000 100 100 plus 0.0 2028 -5 13 1281 1281 1 1281 1 1281 1281 0 100.000 100 100 plus 0.0 2366 -6 14 1434 1434 1 1434 1 1434 1434 0 100.000 100 100 plus 0.0 2649 -7 15 1464 1464 1 1464 1 1464 1464 0 100.000 100 100 plus 0.0 2704 -8 16 1836 1836 1 1836 1 1836 1836 0 100.000 100 100 plus 0.0 3391 -9 17 1914 1914 1 1914 1 1914 1914 0 100.000 100 100 plus 0.0 3535 -10 18 2037 2037 1 2037 1 2037 2037 0 100.000 100 100 plus 0.0 3762 -11 1 285 285 1 285 1 285 285 0 100.000 100 100 plus 1.03e-152 527 -12 19 4935 4935 1 4935 1 4935 4935 0 100.000 100 100 plus 0.0 9114 -13 2 327 327 1 327 1 327 327 0 100.000 100 100 plus 5.34e-176 604 -14 3 417 417 1 417 1 417 417 0 100.000 100 100 plus 0.0 771 -15 4 444 444 1 444 1 444 444 0 100.000 100 100 plus 0.0 821 -16 5 543 543 1 543 1 543 543 0 100.000 100 100 plus 0.0 1003 -17 6 606 606 1 606 1 606 606 0 100.000 100 100 plus 0.0 1120 -18 7 642 642 1 642 1 642 642 0 100.000 100 100 plus 0.0 1186 -19 8 684 684 1 684 1 684 684 0 100.000 100 100 plus 0.0 1264 diff --git a/tests/test_data/outputs/search/G2/blast/nucleotide/queries.fasta b/tests/test_data/outputs/search/G2/blast/nucleotide/queries.fasta deleted file mode 100755 index 9cad9a4..0000000 --- a/tests/test_data/outputs/search/G2/blast/nucleotide/queries.fasta +++ /dev/null @@ -1,40 +0,0 @@ ->0 -atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa ->1 -atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa ->2 -gtgaacctggtgaaaaccccgggtctgcacgcggcgggcaaaggcattaacgttgccaatgtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccaacttcaactttcccggctttgacgtgactcccgcagactgggaacgctttgttaacgacttcctgagctggctgggtcagttcgatatggtctgcgtaagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatcgttgatactagccgtgtagagttagtcgccggtcttatagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcccgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgcgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggaaaataccgtcggcgcaggcggttccatggttggcggatggatttacggcctgctgatgcgcgagtccaccgaacatacgctgcgcctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga ->3 -atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga ->4 -atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga ->5 -ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctattacgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccagcgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcggaaaggcgggattggttcgcgcggtccgggtgaaacccagctcgaagccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgagaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgcagctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaattccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtccgcaggtacgaatgccgattgtcgactggcgtcgcttctgtaaacaagaaccagcgttgatcgaatacgtgatctag ->6 -gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga ->7 -atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctcgtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatctgcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacggtgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcactcaaccggaagggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaaggctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgatgcgctgtcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtaagctgatcagccttgtgcgcaaccttaatcaactgctcaaaagcgagcacgaacgttataacaaataccgcacgagcctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatctgcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcatagcccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttctggccgacagcatcccacacagaaagaggaataa ->8 -atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa ->9 -atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag ->10 -atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacatcgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccacagtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggtcgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattacttgaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcatacagggcggtacggaagtttgtcagcaaaaactggataacatgcgcaacgcaggcgtaaaagtgtacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggggtcagcgcagagctcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacagaagaccggcaagctcggcgagatcctgttctttatgcgggcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccagcaatgactggcagttcgacggcgacgcggaaactattgcacactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgagatttccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa ->11 -atgaaactttttctgacaacggcagcgctaactgcgaccttgacttccggcatgggatttgccagcgataccgtgatcccctgggcgactaacagtggcggtacggaaagcacgcatatcgcggcaatgggcgaggatgtgaatgcgcagcatcagcacatcatccacacgcatgaaggcgtatgtgccgcaaattccggcaccattcaggctgatgaagccgcgctgaccagcaacaagccaccggtacaagtccaaccggaattgttgccgcatcagggctaa ->12 -atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa ->13 -atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa ->14 -ctgaaactaatacctttttatttgctcgcgctattttctgcctcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaacccctgcagataacccgccttataccgtgacaatattgccgccagtagaagcctgcggggaaaaatgtgtgaaactgaacgtagagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaacgacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctacaaccgacagctgcaattacttagggctggcgtaataccgaaaaataaaaagtggaaatggataacgaagatggaaaattactggttcaggcgggtaaaagcgcactaa ->15 -atgaaaaaatttgatagctgtcatcctgtctttttactgatcggctgtgcccaggtgcctctccgttcctccgtgagcaaaccggtacagcaacctagcgctcagaaagagcaactggccaacgcaaatggtattgatgagtgtcagtcacttccgtatgtgccttcagaccttgcgaagaataaatcattatcaaaccagatcgctgataataccgcatcaaaaaatagcgcaatcagctcacgcattttttgcgaaaaatataagcaaaccaaagaacaggcgcttaccttcttccaggaacttccacaatacatgcgttcgaaagaagttgaagagcaacacatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa ->16 -atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa ->17 -gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggactcacaacatgcctgtagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgccagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcgccatccagtcggcaacctggttttcggcaggcaggtcgcgtagggcggttaaaaaaatgatggattgcggcagcggtatttattcttcaatcaatacgttactgaaaaattcgcagattaaaaatatagtcattttaacccataatcattgtttgacgtatattgtaaaaaataagcgcggcgtgaagtttgatccggagtatcttaatgcgctcgttatgtatgcggaaaacggaaaactccttttagacggtgaatttgttcccggctag ->18 -atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga ->19 -atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/search/G2/blast/protein/hsps.txt b/tests/test_data/outputs/search/G2/blast/protein/hsps.txt deleted file mode 100755 index 73033ac..0000000 --- a/tests/test_data/outputs/search/G2/blast/protein/hsps.txt +++ /dev/null @@ -1,20 +0,0 @@ -0 0 34 34 1 34 1 34 34 0 100.000 100 100 N/A 4.94e-20 64.3 -1 9 254 254 1 254 1 254 254 0 100.000 100 100 N/A 0.0 514 -2 10 286 286 1 286 1 286 286 0 100.000 100 100 N/A 0.0 579 -3 11 324 324 1 324 1 324 324 0 100.000 100 100 N/A 0.0 640 -4 12 366 366 1 366 1 366 366 0 100.000 100 100 N/A 0.0 754 -5 13 427 427 1 427 1 427 427 0 100.000 100 100 N/A 0.0 869 -6 14 478 478 1 478 1 478 478 0 100.000 100 100 N/A 0.0 972 -7 15 488 488 1 488 1 488 488 0 100.000 100 100 N/A 0.0 1004 -8 16 612 612 1 612 1 612 612 0 100.000 100 100 N/A 0.0 1241 -9 17 638 638 1 638 1 638 638 0 100.000 100 100 N/A 0.0 1326 -10 18 679 679 1 679 1 679 679 0 100.000 100 100 N/A 0.0 1419 -11 1 95 95 1 95 1 95 95 0 100.000 100 100 N/A 1.23e-68 191 -12 19 1645 1645 1 1645 1 1645 1645 0 100.000 100 100 N/A 0.0 3332 -13 2 109 109 1 109 1 109 109 0 100.000 100 100 N/A 3.09e-81 224 -14 3 139 139 1 139 1 139 139 0 100.000 100 100 N/A 2.34e-106 290 -15 4 148 148 1 148 1 148 148 0 100.000 100 100 N/A 1.04e-110 302 -16 5 181 181 1 181 1 181 181 0 100.000 100 100 N/A 1.79e-140 380 -17 6 202 202 1 202 1 202 202 0 100.000 100 100 N/A 3.71e-154 416 -18 7 214 214 1 214 1 214 214 0 100.000 100 100 N/A 1.75e-157 426 -19 8 228 228 1 228 1 228 228 0 100.000 100 100 N/A 6.32e-172 463 diff --git a/tests/test_data/outputs/search/G2/blast/protein/queries.fasta b/tests/test_data/outputs/search/G2/blast/protein/queries.fasta deleted file mode 100755 index 34499a1..0000000 --- a/tests/test_data/outputs/search/G2/blast/protein/queries.fasta +++ /dev/null @@ -1,40 +0,0 @@ ->0 -MYDPPFLEALMITASFFAIFIIIVVSVLLLEGD* ->1 -MRILLSNDDGVHAPGIQTLAKALREFADVQVVAPDRNRSGASNSLTLESSLRTFTFDNGDIAVQMGTPTDCVYLGVNALMRPRPDIVVSGINAGPNLGDDVIYSGTVAAAMEGRHLGFPALAVSLNGYQHYDTAAAVTCALLRGLSREPLRTGRILNVNVPDLPLAQVKGIRVTRCGSRHPADKVIPQEDPRGNTLYWIGPPGDKYDAGPDTDFAAVDEGYVSVTPLHVDLTAHSAHDVVSDWLDSVGVGTQW* ->2 -VNLVKTPGLHAAGKGINVANVLKDLGIDVTVGGFLGKDNQDGFQQLFSELGIANRFQVVQGRTRINVKLTEKDGEVTNFNFPGFDVTPADWERFVNDFLSWLGQFDMVCVSGSLPAGVSPEAFTDWMTRLRSQCPCIIVDTSRVELVAGLIAAPWLVKPNRRELEIWAGRKLPEMKDVIDAAPALREQGIAHVVISLGAEGALRVNASGEWIAKPPAVDVENTVGAGGSMVGGWIYGLLMRESTEHTLRLATAVAALAVSQSNVGITDRPQLAAMMARVDLQPFN* ->3 -MNKNKYSTPLLMLATILAGMLSPMQSAVNGQLGHWLQDGNACAVISFASGLVVMFFIIIARKETRQQFASIPTLIKKRKIPLWNWFAGLCGAMVVFSEGASASALGVATFQTALISALLLSGLLCDRFGIGVEEKKYFTPWRITGALFAVIATIFVVSPQWHSTSFILLAILPFLAGLLAGWQPAGNAKVAEATGSMLVSITWNFIVGFCVLGAALAIRIALGHVTIQLPDTWWMYLGGPLGLLSIGLMAILVRGLGLLMLGVASTAGQLLGSVLIDELIPSLGNTVYLVTIIGTLFALVGAIVTTIPEYRASKMAQKMEVSE* ->4 -MKGRWAKYVATGVMLAMLAACSSKPTDRGQQYKDGKFTQPFSLVNQPDAVGAPINAGDFAEQVDQIRSASPRLYTNQSNVYNAVQNWLRSGGDTRTMRQFGIDAWQMEGTDNYGNVQFTGYYTPVVQARHTRQGAFQYPIYSMPPKRGRLPSRAQIYAGALSDKYILAWSNSLMDNFIMDVQGSGYIDFGDGSPLNFFSYAGKNGWPYRSIGKVLIDRGEVKKEDMSMQAIREWGEKHSEAEVRELLEQNPSFVFFKPQSFAPVKGASAVPLIGRASVASDRSIIPPGTTLLAEVPLLDNNGKFSGQYELRLMVALDVGGAIKGQHFDIYQGIGPDAGHRAGWYNHYGRVWVLKSAPGAGNVFSG* ->5 -LFDRYDAGEQAVLVHIYFSQDKDMEDLQEFESLVYYAGVEAMQVITGSRKAPHPKYFVGEGKAVEIAEAVKATGAAVVLFDHALSPAQERNLERLCECRVIDRTGLILDIFAQRARTHEGKLQVELAQLRHLATRLVRGWTHLERRKGGIGSRGPGETQLEADRRLLRNRIVQIQSRLEKVEKQREQGRQSRIKADVPTVSLVGYTNAGKSTLFNQITEARVYAADQLFATLDPTLRRIDVADVGETVLADTVGFIRHLPYDLVAAFKATLQETRQATQLLHVVDAADVRVQENIEAVNTVLEEIDAHEFPTLMVMNKIDMLDDFEPRIDRDEENKPIRVWLSAQSGVGIPQLFQALTERLSGEVAQHTLRLLPQEGRLRSRFYQLQAIEKEWMEEDGSVSPQVRMPIVDWRRFCKQEPALIEYVI* ->6 -VAQRILVLGASGYIGQHLVFALSQQGHQVRAAARRVERLEKHRLANVSCHKVDLHWPENLPALLRDIDTVYYLVHGMGEGGDFIAHERQAALNVRDALRQTPVKQLIFLSSLQAPAHEQSDHLRARQLTADTLRDAGVPVTELRAGIIVGAGSAAFEVMRDMVYNLPILTPPRWVRSRTTPIALENLLYYLVGLLEHPAHEHRILEAAGPQVLSYQQQFERFMAVSGKRRPLIPVPFPTRWISVWFLNVITSVPPTTAKALIQGLRHDLLADDAALKKLIPQTLITFDDAVRRTLKEEEKLVNSSDWGYDALAFARWRPEYGYFPKQAGFTAQTPASLSALWQVVNRLGGKEGYFFGNILWQTRAAMDRLVGHKLAKGRPSHTLLKPGDTVDSWKVIIVEPEKQLTLLFGMKAPGLGRLSFTLHDKGRYREIDVRAWWHPHGMPGLIYWLLMIPAHLFIFRGMARRIARLAEQITEK* ->7 -MNKFARHFLPLSLRVRFLLATAGVVLVLSLAYGIVALVGYSVSFDKTTFRLLRGESNLFYTLAKWENNKICVELPENLDMQSPTMTVIYDETGKLLWTQRNIPWLIKSTQPEGLKTNGFHEIETNVDATSTLLSEDHSAQEKLKEVREDDDDAEMTHSVAVNIYPATTRMPQLTIVVVDTIPIELKGSYMVWSWFVYVLAANLLLVIPLLWIAAWWSLRPIDALSREVRELEDHHREMLNPETTRKLISLVRNLNQLLKSEHERYNKYRTSLTDLTHSLKTPLAVLQSTLRSLRNEKMSVSKAEPVMLEQISRISQQIGYYLHRASMRGSGVLLSRELHPVAPLLDNLISALNKVYLRKGVNISMDISPEISFVGEQNDFVEVMGNVLDNACKYCLEFVEISARQTDDHLHIFVEDDGPGIAHSKRSLVFDRGQRADTLRPGQGVGLAVAREITEQYAGQIIASDSLLGGARMEVVSGRQHPTQKEE* ->8 -MTIQKRLLEAVEQKLLRPIDAQFALTVAGNDDPAVTLAAALLSHDAGEGHVCLPLSRLTLTEEAHPLLVAWISETATPIDWKKRLLASAAVSCGDSPAPLILCGDRLYLNRMWCNERTVARFFNEVNQAIAVDEDQLSRILDALFPPTDEVNWQKVAAAVALTRRISVISGGPGTGKTTTVAKLLAALIQMADGERCRIRLAAPTGKAAARLTESLGAALRQLPLTDAQKKRIPEDASTLHRLLGAQPGSQRLRHHAGNPLHLDVLVVDEASMIDLPMMSRLIDALPPHGRVIFLGDRDQLASVEAGAVLGDICAYVNAGFTAERARQLSRLTGSAIPAGAGTQAASLRDSLCLLQKSYRFGSDSGIGKLAAAINCGDRSAIQAVFQQGFSDIEKRTLQSSDDYAGMLDEALAGYGRYLRLLHEKAAPEAILQAFNEYQLLCALREGPFGVRGLNDRIEQAMVQQRKIQRHPHSRWYEGRPVMIARNDSALGLFNGDIGIALDRGQGLRVWFVMPDGTIKSVQPSRLPEHDTTWAMTVHKSQGSEFDHAALILPSQRSPVVTRELVYTAVTRARRRLSLYADERILAGAIVTRTERRSGLATLFDEVSRIG* ->9 -MQEVAMSSQEASKMLRTYNIAWWGNNYYDVNELGHISVCPDPDVPEARVDLAKLVKAREAQGQRLPALFCFPQILQHRLRSINAAFKRARESYGYNGDYFLVYPIKVNQHRRVIESLIHSGEPLGLEAGSKAELMAVLAHAGMTRSVIVCNGYKDREYIRLALIGEKMGHKVYLVIEKMSEIAIVLEEAERLNVVPRLGVRARLASQGSGKWQSSGGEKSKFGLAATQVLQLVETLRDAGRLDSLQLLHFHLGSQMANIRDIATGVRESARFYVELHKLGVNIQCFDVGGGLGVDYEGTRSQSDCSVNYGLNEYANNIIWAIGDACEEHGLPHPTVITESGRAVTAHHTVLVSNIIGVERNEYTDPTAPAEDAPRALQNLWETWQEMHKPGTRRSLREWLHDSQMDLHDIHIGYSSGAFSLQERAWAEQLYLSMCHEVQKQLDPQNRAHRPIIDELQERMADKMYVNFSLFQSMPDAWGIDQLFPVLPLEGLDQVPERRAVLLDITCDSDGAIDHYIDGDGIATTMPMPEYDPENPPMLGFFMVGAYQEILGNMHNLFGDTEAVDVFVFPDGSVEVELSDEGDTVADMLQYVQLDPKTLLTHFRDQVKQTDLDDALQQQFLEEFEAGLYGYTYLEDE* ->10 -MNSLPQRSTDFELTTSQDGFALSWQQRLILRHSAENPCLWIGAGVADIDMFRGNFSIKDKLNEKIALTEATVSELPDGWLVQFSRGATISATLRISADEAGRLTLDLQNDDLHHNRIWLRLAANPDDHIYGCGEQFSYFDLRGKPFPLWTSEQGVGRNKTSYVTWQADCKENAGGDYYLTFFPQPTFVSTQKYYCHVDNSCYMNFDFSAPEYHELALWEDKTTLRFECADTYIALLEKLTALLGRQPELPDWVYDGVTLGIQGGTEVCQQKLDNMRNAGVKVYGIWAQDWSGIRMTSFGKRVMWNWKWNSDNYPQLDSRIKQWKEEGVQFLSYINPYVASDKDLCAEAARHGYLAKDATGGDYLVEFGEFYGGVVDLTNPEAYDWFKDVIKKNMIALGCSGWMADFGEYLPTDTYLHNGVSAELMHNAWPALWAKCNYEALQKTGKLGEILFFMRAGYTGSQKYSTMMWAGDQNVDWSLDDGLASVVPAALSLAMTGHGLHHSDIGGYTTLFDMKRSKELLLRWCDFSAFTPMMRTHEGNRPSNDWQFDGDAETIAHFARMTTVFTTLKPYLKQAVAQNAATGLPVMRPLFLHYENDAATYTLKYQYLLGQDLLVAPVHEQGRCDWTLYLPEDHWVNIWTGEAHHGGEISVDAPIGKPPVFYRAKSEWALLFASLRNI* ->11 -MKLFLTTAALTATLTSGMGFASDTVIPWATNSGGTESTHIAAMGEDVNAQHQHIIHTHEGVCAANSGTIQADEAALTSNKPPVQVQPELLPHQG* ->12 -MKHLRVVACMIMLALAGCDNNDKTAPTTKSEAPAVAQPSPAQDPSQLQKLAQQSQGKALTLLDASEAQLDGAATLVLTFSIPLDPEQDFSRVVHVVDKKSGSVDGAWELAPNLKELRLRHLEPERVLVVTVDPAVKALNNATFGKSYEKTITTRDVQPSVGFASRGSLLPGKIAEGLPVMALNVNHVDVNFFRVKPGSLASFVSQWEYRSSLSNWESDNLLKMADLVYTGRFDLNPARNTREKLLLPLSDIKPLQQAGVYVAVMNQAGHYNYSNAATLFTLSDIGVSAHRYHSRLDIFTQSLENGAAQSGIEIVLLNDKGQTLAQATSDAQGHVQLEADKAAALLLARKEGQTTLLDLTLPALDLSEFNVAGAPGYSKQFFMFGPRDLYRPGETVILNGLLRDSDGKTLPDQPVKLEVVKPDGQVMRTVVSQPENGLYRLNYPLDINAPTGLWHVRANTGDNLLRSWDFHVEDFMPERMALNLTAQKTPLAPADEVKFSVVGYYLYGAPANGNTLQGQLFLRPLRDAVAALPGFQFGNIAEENLSRSLDEVQLTLDKGGRGEVSAASQWQEAHSPLQVILQASLLESGGRPVTRRVEQAIWPADTLPGIRPQFAAKAVYDYRTDTTVNQPIVDEDSNAAFDIVYANAQGEKKAVSGLQVRLIRERRDYYWNWSESEGWQSQFDQKDLVEGEQTLDLNADETGKVSFPVEWGAYRLEVKAPNETVSSVRFWAGYSWQDNSDGSGAARPDRVTLKLDKANYRPGDTMKLHIAAPVAGKGYAMVESSDGPLWWQAIDVPAQGLELTIPVDKTWNRHDLYLSTLVVRPGDKSRSATPKRAVGLLHLPLGDDNRRLDLALESPAKMRPNQPLTVRVKASVKHGEMPKQINVLVSAVDSGVLNITDYATPDPWQAFFGQKRYGADIYDIYGQVIEGQGRLAALRFGGDGDDLTRGGKPPVNHANIIAQQAQPITLNEQGEGVVTLPIGDFNGELRVMAQAWTADDFGRGESKVVVAAPVIAELNMPRFLAGGDVSRLVLDVTNLTDRPQTLNIALAASGLLELLSQQPQPVNLAPGVRTTLFVPVRALEGFGEGEIQATISGLNLPGETLGAQHKQWQIGVRPAWPAQTVNSGIALAPGESWHVPEQHLANVSPATLQGQLLLSGKPPLNLARYIRELKAYPYGCLEQTTSGLFPALYTNAAQLQSLGITGDSDEKRRAAVDIGISRILQMQRDNGGFALWDENGAEEPWLTAYAMDFLIRAGEQGYSVPPEAINRGNERLLRYLQDPGTMLIRYSDNTQASTFAAQAYAALVLARQQKAPLGALREIWERRSQAASGLPLMQLGIALNTMGDARRGEEAITLALNTPRQDERQWIADYGSSLRDNALMLSLLEENNLRPDAQNALLSSLSEQAFGQRWLSTQENNALFLAAHSRQASAGAWQAQTSLEAQPLSGDKALTRNLDADQLAALEVTNTGSQPLWLRLDSSGYPSSAPEPASNVLQIERQILGTDGQRKSLSSLRSGELVLVWLTVVADRNVPDALVVDLLPAGLELENQNLADSSASLPESGSEVQNLLNQMQQADIQYMEFRDDRFVAAVVVNEGQPVTLVYLARAVTPGTYQLAQPQVESMYAPQWRATGASEGLLIVTP* ->13 -MAKNRSRRLRKKMHIDEFQELGFSVAWRFPEGTSEEQIDKTVDDFINDVIEPNKLAFDGSGYLAWEGLICMQEIGKCTEEHQAIVRKWLEARNLEEVRTSELFDVWWD* ->14 -LKLIPFYLLALFSASSGATEINACKDLIGTWKTPADNPPYTVTILPPVEACGEKCVKLNVEYELDVTHRNALYCHERQEGVKGQGPMVIAFEGAYGGHAIGTYNRQLQLLRAGVIPKNKKWKWITKMENYWFRRVKAH* ->15 -MKKFDSCHPVFLLIGCAQVPLRSSVSKPVQQPSAQKEQLANANGIDECQSLPYVPSDLAKNKSLSNQIADNTASKNSAISSRIFCEKYKQTKEQALTFFQELPQYMRSKEVEEQHMTEFKKVLLEPGSKNLSIYQTLLAAHERLQAL* ->16 -MRIKPDDNWRWYYDEEHDRMMLDLANGMLFRSRFSRKMLTPDAFCPTGFCVDDAALYFSFEEKCRDFELTKEQRAELVLNALVAIRYLKPQMPKSWHFVAHGEMWTPGTGDAASVWLSDTAEQVNLLVVEPGENAALCLLAQPGVVIAGRTMQLGDAIKIMNDRLKPQVHCHSFSLEQAV* ->17 -VLAFTLRFIKNKRYFAILAGALVIIAGLDSQHACSGNGLPQINGKALAALAKQHPVVVLFRHAERCDRSDNTCLPDSTGITVNGAQDARALGKAFSADIQNYNLYSSNTVRAIQSATWFSAGRSRRAVKKMMDCGSGIYSSINTLLKNSQIKNIVILTHNHCLTYIVKNKRGVKFDPEYLNALVMYAENGKLLLDGEFVPG* ->18 -MGSNYIVIEGLEGAGKTTARDVVVETLEQLGIRNMIFTREPGGTQLAEKLRSLVLDIRSVGDEVITDKAEVLMFYAARVQLVETVIKPALAQGVWVIGDRHDLSTQAYQGGGRGIDQTMLATLRDAVLGDFRPDLTLYLDVTPEVGLKRARARGDLDRIEQESFDFFNRTRARYLELAAQDSRIRTIDATQPLDAVMRDIRATVTKWVQEQAA* ->19 -MKHIKKSVLVVLLTSHVAHASIVVGGTRLVFDGNNDESSINVENKDSKANLVQSWLSVADPQVTNKQVFIITPPLFRLDAGQKNSIRVIRSGAPLPADRESMYWLNIKGIPSIDDNASANRVEISINTQIKLIYRPPALTKSTPDSQSQQLKWQTAGDVITVNNPTPYYMNFASVTLNSHEVKSATFVPPKSSASFKLSSTAAPHGTVTWRLISDYGMSLEPHSGSF* diff --git a/tests/test_data/outputs/search/G2/run.json b/tests/test_data/outputs/search/G2/run.json index 849db03..fe81841 100755 --- a/tests/test_data/outputs/search/G2/run.json +++ b/tests/test_data/outputs/search/G2/run.json @@ -1,11 +1,15 @@ { - "analysis_start_time": "10/06/2024 11:11:38", + "analysis_start_time": "19/06/2024 10:43:36", "parameters": { - "query": "locidex/extract/G2/raw.extracted.seqs.fasta", - "outdir": "locidex/search/G2", + "command": "search", + "query": "test_dev/extract/G2/raw.extracted.seqs.fasta", + "outdir": "test_dev/search/G2", + "db": "test_set/db", + "db_group": null, "name": "G2", - "db": "locidex/db", "config": null, + "db_name": "Locidex Test Database", + "db_version": "1.0.0", "min_evalue": 0.0001, "min_dna_len": 1, "min_aa_len": 1, @@ -20,8 +24,16 @@ "format": null, "translation_table": 11, "annotate": false, - "force": true + "force": true, + "db_date": "2024/06/06", + "db_author": "James Robertson", + "db_desc": "Using Salmonella GCA_000007545.2 as source data", + "db_num_seqs": 20, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" }, - "result_file": "locidex/search/G2/seq_store.json", - "analysis_end_time": "10/06/2024 11:11:41" + "result_file": "test_dev/search/G2/seq_store.json", + "analysis_end_time": "19/06/2024 10:43:36" } \ No newline at end of file diff --git a/tests/test_data/outputs/search/G2/seq_store.json b/tests/test_data/outputs/search/G2/seq_store.json index d2f120d..c067f17 100755 --- a/tests/test_data/outputs/search/G2/seq_store.json +++ b/tests/test_data/outputs/search/G2/seq_store.json @@ -460,261 +460,301 @@ "parent_id": "locus_1:0:0:0", "locus_name": "locus_1:0:0:0", "seq_id": "locus_1:0:0:0", + "dna_seq": "atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa", + "dna_ambig_count": 0, "dna_hash": "d17b02d12afa7f832ee37df6f24a8f55", "dna_len": 102, + "aa_seq": "MYDPPFLEALMITASFFAIFIIIVVSVLLLEGD*", "aa_hash": "a931d1f75114576e60538364eb01a05f", "aa_len": 34, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "1": { "parent_id": "locus_10:9:0:1", "locus_name": "locus_10:9:0:1", "seq_id": "locus_10:9:0:1", + "dna_seq": "atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa", + "dna_ambig_count": 0, "dna_hash": "fe04d17ec353c08b903c85fc0ca4dc02", "dna_len": 762, + "aa_seq": "MRILLSNDDGVHAPGIQTLAKALREFADVQVVAPDRNRSGASNSLTLESSLRTFTFDNGDIAVQMGTPTDCVYLGVNALMRPRPDIVVSGINAGPNLGDDVIYSGTVAAAMEGRHLGFPALAVSLNGYQHYDTAAAVTCALLRGLSREPLRTGRILNVNVPDLPLAQVKGIRVTRCGSRHPADKVIPQEDPRGNTLYWIGPPGDKYDAGPDTDFAAVDEGYVSVTPLHVDLTAHSAHDVVSDWLDSVGVGTQW*", "aa_hash": "988bf512f0362e276b0e5622fbaa7079", "aa_len": 254, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "2": { "parent_id": "locus_11:10:0:2", "locus_name": "locus_11:10:0:2", "seq_id": "locus_11:10:0:2", + "dna_seq": "gtgaacctggtgaaaaccccgggtctgcacgcggcgggcaaaggcattaacgttgccaatgtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccaacttcaactttcccggctttgacgtgactcccgcagactgggaacgctttgttaacgacttcctgagctggctgggtcagttcgatatggtctgcgtaagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatcgttgatactagccgtgtagagttagtcgccggtcttatagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcccgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgcgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggaaaataccgtcggcgcaggcggttccatggttggcggatggatttacggcctgctgatgcgcgagtccaccgaacatacgctgcgcctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga", + "dna_ambig_count": 0, "dna_hash": "5b128d659955716833ce42f2bb060212", "dna_len": 858, + "aa_seq": "VNLVKTPGLHAAGKGINVANVLKDLGIDVTVGGFLGKDNQDGFQQLFSELGIANRFQVVQGRTRINVKLTEKDGEVTNFNFPGFDVTPADWERFVNDFLSWLGQFDMVCVSGSLPAGVSPEAFTDWMTRLRSQCPCIIVDTSRVELVAGLIAAPWLVKPNRRELEIWAGRKLPEMKDVIDAAPALREQGIAHVVISLGAEGALRVNASGEWIAKPPAVDVENTVGAGGSMVGGWIYGLLMRESTEHTLRLATAVAALAVSQSNVGITDRPQLAAMMARVDLQPFN*", "aa_hash": "d6a46f107d0604f27820147b523948c8", "aa_len": 286, "start_codon": "gtg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "3": { "parent_id": "locus_12:11:0:3", "locus_name": "locus_12:11:0:3", "seq_id": "locus_12:11:0:3", + "dna_seq": "atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga", + "dna_ambig_count": 0, "dna_hash": "eb72da68c159497d5f0c8eeddc51b5ae", "dna_len": 972, + "aa_seq": "MNKNKYSTPLLMLATILAGMLSPMQSAVNGQLGHWLQDGNACAVISFASGLVVMFFIIIARKETRQQFASIPTLIKKRKIPLWNWFAGLCGAMVVFSEGASASALGVATFQTALISALLLSGLLCDRFGIGVEEKKYFTPWRITGALFAVIATIFVVSPQWHSTSFILLAILPFLAGLLAGWQPAGNAKVAEATGSMLVSITWNFIVGFCVLGAALAIRIALGHVTIQLPDTWWMYLGGPLGLLSIGLMAILVRGLGLLMLGVASTAGQLLGSVLIDELIPSLGNTVYLVTIIGTLFALVGAIVTTIPEYRASKMAQKMEVSE*", "aa_hash": "2fb77f6d0f615a030a08af74f3eda277", "aa_len": 324, "start_codon": "atg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "4": { "parent_id": "locus_13:12:0:4", "locus_name": "locus_13:12:0:4", "seq_id": "locus_13:12:0:4", + "dna_seq": "atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga", + "dna_ambig_count": 0, "dna_hash": "8f300259dcb46224bdc1fe5273107324", "dna_len": 1098, + "aa_seq": "MKGRWAKYVATGVMLAMLAACSSKPTDRGQQYKDGKFTQPFSLVNQPDAVGAPINAGDFAEQVDQIRSASPRLYTNQSNVYNAVQNWLRSGGDTRTMRQFGIDAWQMEGTDNYGNVQFTGYYTPVVQARHTRQGAFQYPIYSMPPKRGRLPSRAQIYAGALSDKYILAWSNSLMDNFIMDVQGSGYIDFGDGSPLNFFSYAGKNGWPYRSIGKVLIDRGEVKKEDMSMQAIREWGEKHSEAEVRELLEQNPSFVFFKPQSFAPVKGASAVPLIGRASVASDRSIIPPGTTLLAEVPLLDNNGKFSGQYELRLMVALDVGGAIKGQHFDIYQGIGPDAGHRAGWYNHYGRVWVLKSAPGAGNVFSG*", "aa_hash": "fd5140611af6e0d9f95426bf9beaa8b1", "aa_len": 366, "start_codon": "atg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "5": { "parent_id": "locus_14:13:0:5", "locus_name": "locus_14:13:0:5", "seq_id": "locus_14:13:0:5", + "dna_seq": "ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctattacgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccagcgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcggaaaggcgggattggttcgcgcggtccgggtgaaacccagctcgaagccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgagaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgcagctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaattccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtccgcaggtacgaatgccgattgtcgactggcgtcgcttctgtaaacaagaaccagcgttgatcgaatacgtgatctag", + "dna_ambig_count": 0, "dna_hash": "b9060019038526aa6fc38d2f7510edc6", "dna_len": 1281, + "aa_seq": "LFDRYDAGEQAVLVHIYFSQDKDMEDLQEFESLVYYAGVEAMQVITGSRKAPHPKYFVGEGKAVEIAEAVKATGAAVVLFDHALSPAQERNLERLCECRVIDRTGLILDIFAQRARTHEGKLQVELAQLRHLATRLVRGWTHLERRKGGIGSRGPGETQLEADRRLLRNRIVQIQSRLEKVEKQREQGRQSRIKADVPTVSLVGYTNAGKSTLFNQITEARVYAADQLFATLDPTLRRIDVADVGETVLADTVGFIRHLPYDLVAAFKATLQETRQATQLLHVVDAADVRVQENIEAVNTVLEEIDAHEFPTLMVMNKIDMLDDFEPRIDRDEENKPIRVWLSAQSGVGIPQLFQALTERLSGEVAQHTLRLLPQEGRLRSRFYQLQAIEKEWMEEDGSVSPQVRMPIVDWRRFCKQEPALIEYVI*", "aa_hash": "05bc7823b1abc2e6d4e2c08ca5325134", "aa_len": 427, "start_codon": "ttg", - "stop_codon": "tag", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tag", + "count_internal_stop": 0 }, "6": { "parent_id": "locus_15:14:0:6", "locus_name": "locus_15:14:0:6", "seq_id": "locus_15:14:0:6", + "dna_seq": "gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga", + "dna_ambig_count": 0, "dna_hash": "bc98c2fe196a68a79036814396513a8d", "dna_len": 1434, + "aa_seq": "VAQRILVLGASGYIGQHLVFALSQQGHQVRAAARRVERLEKHRLANVSCHKVDLHWPENLPALLRDIDTVYYLVHGMGEGGDFIAHERQAALNVRDALRQTPVKQLIFLSSLQAPAHEQSDHLRARQLTADTLRDAGVPVTELRAGIIVGAGSAAFEVMRDMVYNLPILTPPRWVRSRTTPIALENLLYYLVGLLEHPAHEHRILEAAGPQVLSYQQQFERFMAVSGKRRPLIPVPFPTRWISVWFLNVITSVPPTTAKALIQGLRHDLLADDAALKKLIPQTLITFDDAVRRTLKEEEKLVNSSDWGYDALAFARWRPEYGYFPKQAGFTAQTPASLSALWQVVNRLGGKEGYFFGNILWQTRAAMDRLVGHKLAKGRPSHTLLKPGDTVDSWKVIIVEPEKQLTLLFGMKAPGLGRLSFTLHDKGRYREIDVRAWWHPHGMPGLIYWLLMIPAHLFIFRGMARRIARLAEQITEK*", "aa_hash": "5d8c3cf06444c3741ebfc57cc92db0d3", "aa_len": 478, "start_codon": "gtg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "7": { "parent_id": "locus_16:15:0:7", "locus_name": "locus_16:15:0:7", "seq_id": "locus_16:15:0:7", + "dna_seq": "atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctcgtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatctgcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacggtgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcactcaaccggaagggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaaggctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgatgcgctgtcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtaagctgatcagccttgtgcgcaaccttaatcaactgctcaaaagcgagcacgaacgttataacaaataccgcacgagcctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatctgcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcatagcccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttctggccgacagcatcccacacagaaagaggaataa", + "dna_ambig_count": 0, "dna_hash": "16e55766c603fe33c9e75d8e81743ae2", "dna_len": 1464, + "aa_seq": "MNKFARHFLPLSLRVRFLLATAGVVLVLSLAYGIVALVGYSVSFDKTTFRLLRGESNLFYTLAKWENNKICVELPENLDMQSPTMTVIYDETGKLLWTQRNIPWLIKSTQPEGLKTNGFHEIETNVDATSTLLSEDHSAQEKLKEVREDDDDAEMTHSVAVNIYPATTRMPQLTIVVVDTIPIELKGSYMVWSWFVYVLAANLLLVIPLLWIAAWWSLRPIDALSREVRELEDHHREMLNPETTRKLISLVRNLNQLLKSEHERYNKYRTSLTDLTHSLKTPLAVLQSTLRSLRNEKMSVSKAEPVMLEQISRISQQIGYYLHRASMRGSGVLLSRELHPVAPLLDNLISALNKVYLRKGVNISMDISPEISFVGEQNDFVEVMGNVLDNACKYCLEFVEISARQTDDHLHIFVEDDGPGIAHSKRSLVFDRGQRADTLRPGQGVGLAVAREITEQYAGQIIASDSLLGGARMEVVSGRQHPTQKEE*", "aa_hash": "f85b3701f5642454bf4d2263feb13354", "aa_len": 488, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "8": { "parent_id": "locus_17:16:0:8", "locus_name": "locus_17:16:0:8", "seq_id": "locus_17:16:0:8", + "dna_seq": "atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa", + "dna_ambig_count": 0, "dna_hash": "a0d97d985483413f3c18bfe5833ae9ce", "dna_len": 1836, + "aa_seq": "MTIQKRLLEAVEQKLLRPIDAQFALTVAGNDDPAVTLAAALLSHDAGEGHVCLPLSRLTLTEEAHPLLVAWISETATPIDWKKRLLASAAVSCGDSPAPLILCGDRLYLNRMWCNERTVARFFNEVNQAIAVDEDQLSRILDALFPPTDEVNWQKVAAAVALTRRISVISGGPGTGKTTTVAKLLAALIQMADGERCRIRLAAPTGKAAARLTESLGAALRQLPLTDAQKKRIPEDASTLHRLLGAQPGSQRLRHHAGNPLHLDVLVVDEASMIDLPMMSRLIDALPPHGRVIFLGDRDQLASVEAGAVLGDICAYVNAGFTAERARQLSRLTGSAIPAGAGTQAASLRDSLCLLQKSYRFGSDSGIGKLAAAINCGDRSAIQAVFQQGFSDIEKRTLQSSDDYAGMLDEALAGYGRYLRLLHEKAAPEAILQAFNEYQLLCALREGPFGVRGLNDRIEQAMVQQRKIQRHPHSRWYEGRPVMIARNDSALGLFNGDIGIALDRGQGLRVWFVMPDGTIKSVQPSRLPEHDTTWAMTVHKSQGSEFDHAALILPSQRSPVVTRELVYTAVTRARRRLSLYADERILAGAIVTRTERRSGLATLFDEVSRIG*", "aa_hash": "bdd2b967a7a6337b5973dd723a62fa43", "aa_len": 612, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "9": { "parent_id": "locus_18:17:0:9", "locus_name": "locus_18:17:0:9", "seq_id": "locus_18:17:0:9", + "dna_seq": "atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag", + "dna_ambig_count": 0, "dna_hash": "b3021e979faa7600756c06dfadfcf14c", "dna_len": 1914, + "aa_seq": "MQEVAMSSQEASKMLRTYNIAWWGNNYYDVNELGHISVCPDPDVPEARVDLAKLVKAREAQGQRLPALFCFPQILQHRLRSINAAFKRARESYGYNGDYFLVYPIKVNQHRRVIESLIHSGEPLGLEAGSKAELMAVLAHAGMTRSVIVCNGYKDREYIRLALIGEKMGHKVYLVIEKMSEIAIVLEEAERLNVVPRLGVRARLASQGSGKWQSSGGEKSKFGLAATQVLQLVETLRDAGRLDSLQLLHFHLGSQMANIRDIATGVRESARFYVELHKLGVNIQCFDVGGGLGVDYEGTRSQSDCSVNYGLNEYANNIIWAIGDACEEHGLPHPTVITESGRAVTAHHTVLVSNIIGVERNEYTDPTAPAEDAPRALQNLWETWQEMHKPGTRRSLREWLHDSQMDLHDIHIGYSSGAFSLQERAWAEQLYLSMCHEVQKQLDPQNRAHRPIIDELQERMADKMYVNFSLFQSMPDAWGIDQLFPVLPLEGLDQVPERRAVLLDITCDSDGAIDHYIDGDGIATTMPMPEYDPENPPMLGFFMVGAYQEILGNMHNLFGDTEAVDVFVFPDGSVEVELSDEGDTVADMLQYVQLDPKTLLTHFRDQVKQTDLDDALQQQFLEEFEAGLYGYTYLEDE*", "aa_hash": "42c4a831ee79a27c47138fe96829814b", "aa_len": 638, "start_codon": "atg", - "stop_codon": "tag", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tag", + "count_internal_stop": 0 }, "10": { "parent_id": "locus_19:18:0:10", "locus_name": "locus_19:18:0:10", "seq_id": "locus_19:18:0:10", + "dna_seq": "atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacatcgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccacagtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggtcgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattacttgaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcatacagggcggtacggaagtttgtcagcaaaaactggataacatgcgcaacgcaggcgtaaaagtgtacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggggtcagcgcagagctcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacagaagaccggcaagctcggcgagatcctgttctttatgcgggcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccagcaatgactggcagttcgacggcgacgcggaaactattgcacactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgagatttccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa", + "dna_ambig_count": 0, "dna_hash": "a012eee23637b48e39b00808a057e35d", "dna_len": 2037, + "aa_seq": "MNSLPQRSTDFELTTSQDGFALSWQQRLILRHSAENPCLWIGAGVADIDMFRGNFSIKDKLNEKIALTEATVSELPDGWLVQFSRGATISATLRISADEAGRLTLDLQNDDLHHNRIWLRLAANPDDHIYGCGEQFSYFDLRGKPFPLWTSEQGVGRNKTSYVTWQADCKENAGGDYYLTFFPQPTFVSTQKYYCHVDNSCYMNFDFSAPEYHELALWEDKTTLRFECADTYIALLEKLTALLGRQPELPDWVYDGVTLGIQGGTEVCQQKLDNMRNAGVKVYGIWAQDWSGIRMTSFGKRVMWNWKWNSDNYPQLDSRIKQWKEEGVQFLSYINPYVASDKDLCAEAARHGYLAKDATGGDYLVEFGEFYGGVVDLTNPEAYDWFKDVIKKNMIALGCSGWMADFGEYLPTDTYLHNGVSAELMHNAWPALWAKCNYEALQKTGKLGEILFFMRAGYTGSQKYSTMMWAGDQNVDWSLDDGLASVVPAALSLAMTGHGLHHSDIGGYTTLFDMKRSKELLLRWCDFSAFTPMMRTHEGNRPSNDWQFDGDAETIAHFARMTTVFTTLKPYLKQAVAQNAATGLPVMRPLFLHYENDAATYTLKYQYLLGQDLLVAPVHEQGRCDWTLYLPEDHWVNIWTGEAHHGGEISVDAPIGKPPVFYRAKSEWALLFASLRNI*", "aa_hash": "cb1202450e68e2b4f0d557a645f1a98d", "aa_len": 679, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "11": { "parent_id": "locus_2:1:0:11", "locus_name": "locus_2:1:0:11", "seq_id": "locus_2:1:0:11", + "dna_seq": "atgaaactttttctgacaacggcagcgctaactgcgaccttgacttccggcatgggatttgccagcgataccgtgatcccctgggcgactaacagtggcggtacggaaagcacgcatatcgcggcaatgggcgaggatgtgaatgcgcagcatcagcacatcatccacacgcatgaaggcgtatgtgccgcaaattccggcaccattcaggctgatgaagccgcgctgaccagcaacaagccaccggtacaagtccaaccggaattgttgccgcatcagggctaa", + "dna_ambig_count": 0, "dna_hash": "e35184c8ff18e9116fc8faef20532f56", "dna_len": 285, + "aa_seq": "MKLFLTTAALTATLTSGMGFASDTVIPWATNSGGTESTHIAAMGEDVNAQHQHIIHTHEGVCAANSGTIQADEAALTSNKPPVQVQPELLPHQG*", "aa_hash": "2a1a77c25ad681437705d9145aef608c", "aa_len": 95, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "12": { "parent_id": "locus_20:19:0:12", "locus_name": "locus_20:19:0:12", "seq_id": "locus_20:19:0:12", + "dna_seq": "atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa", + "dna_ambig_count": 0, "dna_hash": "4461918e985715e4a2b07494e1f91326", "dna_len": 4935, + "aa_seq": "MKHLRVVACMIMLALAGCDNNDKTAPTTKSEAPAVAQPSPAQDPSQLQKLAQQSQGKALTLLDASEAQLDGAATLVLTFSIPLDPEQDFSRVVHVVDKKSGSVDGAWELAPNLKELRLRHLEPERVLVVTVDPAVKALNNATFGKSYEKTITTRDVQPSVGFASRGSLLPGKIAEGLPVMALNVNHVDVNFFRVKPGSLASFVSQWEYRSSLSNWESDNLLKMADLVYTGRFDLNPARNTREKLLLPLSDIKPLQQAGVYVAVMNQAGHYNYSNAATLFTLSDIGVSAHRYHSRLDIFTQSLENGAAQSGIEIVLLNDKGQTLAQATSDAQGHVQLEADKAAALLLARKEGQTTLLDLTLPALDLSEFNVAGAPGYSKQFFMFGPRDLYRPGETVILNGLLRDSDGKTLPDQPVKLEVVKPDGQVMRTVVSQPENGLYRLNYPLDINAPTGLWHVRANTGDNLLRSWDFHVEDFMPERMALNLTAQKTPLAPADEVKFSVVGYYLYGAPANGNTLQGQLFLRPLRDAVAALPGFQFGNIAEENLSRSLDEVQLTLDKGGRGEVSAASQWQEAHSPLQVILQASLLESGGRPVTRRVEQAIWPADTLPGIRPQFAAKAVYDYRTDTTVNQPIVDEDSNAAFDIVYANAQGEKKAVSGLQVRLIRERRDYYWNWSESEGWQSQFDQKDLVEGEQTLDLNADETGKVSFPVEWGAYRLEVKAPNETVSSVRFWAGYSWQDNSDGSGAARPDRVTLKLDKANYRPGDTMKLHIAAPVAGKGYAMVESSDGPLWWQAIDVPAQGLELTIPVDKTWNRHDLYLSTLVVRPGDKSRSATPKRAVGLLHLPLGDDNRRLDLALESPAKMRPNQPLTVRVKASVKHGEMPKQINVLVSAVDSGVLNITDYATPDPWQAFFGQKRYGADIYDIYGQVIEGQGRLAALRFGGDGDDLTRGGKPPVNHANIIAQQAQPITLNEQGEGVVTLPIGDFNGELRVMAQAWTADDFGRGESKVVVAAPVIAELNMPRFLAGGDVSRLVLDVTNLTDRPQTLNIALAASGLLELLSQQPQPVNLAPGVRTTLFVPVRALEGFGEGEIQATISGLNLPGETLGAQHKQWQIGVRPAWPAQTVNSGIALAPGESWHVPEQHLANVSPATLQGQLLLSGKPPLNLARYIRELKAYPYGCLEQTTSGLFPALYTNAAQLQSLGITGDSDEKRRAAVDIGISRILQMQRDNGGFALWDENGAEEPWLTAYAMDFLIRAGEQGYSVPPEAINRGNERLLRYLQDPGTMLIRYSDNTQASTFAAQAYAALVLARQQKAPLGALREIWERRSQAASGLPLMQLGIALNTMGDARRGEEAITLALNTPRQDERQWIADYGSSLRDNALMLSLLEENNLRPDAQNALLSSLSEQAFGQRWLSTQENNALFLAAHSRQASAGAWQAQTSLEAQPLSGDKALTRNLDADQLAALEVTNTGSQPLWLRLDSSGYPSSAPEPASNVLQIERQILGTDGQRKSLSSLRSGELVLVWLTVVADRNVPDALVVDLLPAGLELENQNLADSSASLPESGSEVQNLLNQMQQADIQYMEFRDDRFVAAVVVNEGQPVTLVYLARAVTPGTYQLAQPQVESMYAPQWRATGASEGLLIVTP*", "aa_hash": "ffe361d9273afbab0ed9009dd5aa4041", "aa_len": 1645, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "13": { "parent_id": "locus_3:2:0:13", "locus_name": "locus_3:2:0:13", "seq_id": "locus_3:2:0:13", + "dna_seq": "atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa", + "dna_ambig_count": 0, "dna_hash": "670705cd2a59c4a23a897ac656a888fe", "dna_len": 327, + "aa_seq": "MAKNRSRRLRKKMHIDEFQELGFSVAWRFPEGTSEEQIDKTVDDFINDVIEPNKLAFDGSGYLAWEGLICMQEIGKCTEEHQAIVRKWLEARNLEEVRTSELFDVWWD*", "aa_hash": "f46f4f7b2362abaf1440c9752e10670e", "aa_len": 109, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "14": { "parent_id": "locus_4:3:0:14", "locus_name": "locus_4:3:0:14", "seq_id": "locus_4:3:0:14", + "dna_seq": "ctgaaactaatacctttttatttgctcgcgctattttctgcctcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaacccctgcagataacccgccttataccgtgacaatattgccgccagtagaagcctgcggggaaaaatgtgtgaaactgaacgtagagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaacgacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctacaaccgacagctgcaattacttagggctggcgtaataccgaaaaataaaaagtggaaatggataacgaagatggaaaattactggttcaggcgggtaaaagcgcactaa", + "dna_ambig_count": 0, "dna_hash": "ac1b21798c0f672ad26f5a91ea278590", "dna_len": 417, + "aa_seq": "LKLIPFYLLALFSASSGATEINACKDLIGTWKTPADNPPYTVTILPPVEACGEKCVKLNVEYELDVTHRNALYCHERQEGVKGQGPMVIAFEGAYGGHAIGTYNRQLQLLRAGVIPKNKKWKWITKMENYWFRRVKAH*", "aa_hash": "dbcec3a0e9ecdc165c4e9162b079f2ee", "aa_len": 139, "start_codon": "ctg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "15": { "parent_id": "locus_5:4:0:15", "locus_name": "locus_5:4:0:15", "seq_id": "locus_5:4:0:15", + "dna_seq": "atgaaaaaatttgatagctgtcatcctgtctttttactgatcggctgtgcccaggtgcctctccgttcctccgtgagcaaaccggtacagcaacctagcgctcagaaagagcaactggccaacgcaaatggtattgatgagtgtcagtcacttccgtatgtgccttcagaccttgcgaagaataaatcattatcaaaccagatcgctgataataccgcatcaaaaaatagcgcaatcagctcacgcattttttgcgaaaaatataagcaaaccaaagaacaggcgcttaccttcttccaggaacttccacaatacatgcgttcgaaagaagttgaagagcaacacatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa", + "dna_ambig_count": 0, "dna_hash": "d00defcca8588f21ce16fa1d0ac13389", "dna_len": 444, + "aa_seq": "MKKFDSCHPVFLLIGCAQVPLRSSVSKPVQQPSAQKEQLANANGIDECQSLPYVPSDLAKNKSLSNQIADNTASKNSAISSRIFCEKYKQTKEQALTFFQELPQYMRSKEVEEQHMTEFKKVLLEPGSKNLSIYQTLLAAHERLQAL*", "aa_hash": "82d8baa0a3dad18a0efd8104ee15baae", "aa_len": 148, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "16": { "parent_id": "locus_6:5:0:16", "locus_name": "locus_6:5:0:16", "seq_id": "locus_6:5:0:16", + "dna_seq": "atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa", + "dna_ambig_count": 0, "dna_hash": "a11561f2804e2c32c78049f8b9aeb517", "dna_len": 543, + "aa_seq": "MRIKPDDNWRWYYDEEHDRMMLDLANGMLFRSRFSRKMLTPDAFCPTGFCVDDAALYFSFEEKCRDFELTKEQRAELVLNALVAIRYLKPQMPKSWHFVAHGEMWTPGTGDAASVWLSDTAEQVNLLVVEPGENAALCLLAQPGVVIAGRTMQLGDAIKIMNDRLKPQVHCHSFSLEQAV*", "aa_hash": "f7a904be6607ca5a6d9388ad9ca3693a", "aa_len": 181, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "17": { "parent_id": "locus_7:6:0:17", "locus_name": "locus_7:6:0:17", "seq_id": "locus_7:6:0:17", + "dna_seq": "gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggactcacaacatgcctgtagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgccagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcgccatccagtcggcaacctggttttcggcaggcaggtcgcgtagggcggttaaaaaaatgatggattgcggcagcggtatttattcttcaatcaatacgttactgaaaaattcgcagattaaaaatatagtcattttaacccataatcattgtttgacgtatattgtaaaaaataagcgcggcgtgaagtttgatccggagtatcttaatgcgctcgttatgtatgcggaaaacggaaaactccttttagacggtgaatttgttcccggctag", + "dna_ambig_count": 0, "dna_hash": "dc94bf1ec4ff9bed2a1f460cbd958656", "dna_len": 606, + "aa_seq": "VLAFTLRFIKNKRYFAILAGALVIIAGLDSQHACSGNGLPQINGKALAALAKQHPVVVLFRHAERCDRSDNTCLPDSTGITVNGAQDARALGKAFSADIQNYNLYSSNTVRAIQSATWFSAGRSRRAVKKMMDCGSGIYSSINTLLKNSQIKNIVILTHNHCLTYIVKNKRGVKFDPEYLNALVMYAENGKLLLDGEFVPG*", "aa_hash": "da78b534d889d8f35bec304ef54f1b93", "aa_len": 202, "start_codon": "gtg", - "stop_codon": "tag", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tag", + "count_internal_stop": 0 }, "18": { "parent_id": "locus_8:7:0:18", "locus_name": "locus_8:7:0:18", "seq_id": "locus_8:7:0:18", + "dna_seq": "atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga", + "dna_ambig_count": 0, "dna_hash": "7ebe74afecf146ec4db816c8deced64f", "dna_len": 642, + "aa_seq": "MGSNYIVIEGLEGAGKTTARDVVVETLEQLGIRNMIFTREPGGTQLAEKLRSLVLDIRSVGDEVITDKAEVLMFYAARVQLVETVIKPALAQGVWVIGDRHDLSTQAYQGGGRGIDQTMLATLRDAVLGDFRPDLTLYLDVTPEVGLKRARARGDLDRIEQESFDFFNRTRARYLELAAQDSRIRTIDATQPLDAVMRDIRATVTKWVQEQAA*", "aa_hash": "ad5a47cb3a55985083b963b0cf67f382", "aa_len": 214, "start_codon": "atg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "19": { "parent_id": "locus_9:8:0:19", "locus_name": "locus_9:8:0:19", "seq_id": "locus_9:8:0:19", + "dna_seq": "atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag", + "dna_ambig_count": 0, "dna_hash": "41ebb36872854b2b33c8c028e23d8ad1", "dna_len": 684, + "aa_seq": "MKHIKKSVLVVLLTSHVAHASIVVGGTRLVFDGNNDESSINVENKDSKANLVQSWLSVADPQVTNKQVFIITPPLFRLDAGQKNSIRVIRSGAPLPADRESMYWLNIKGIPSIDDNASANRVEISINTQIKLIYRPPALTKSTPDSQSQQLKWQTAGDVITVNNPTPYYMNFASVTLNSHEVKSATFVPPKSSASFKLSSTAAPHGTVTWRLISDYGMSLEPHSGSF*", "aa_hash": "02989d1dbb505d15f06e76ddc9174b7e", "aa_len": 228, "start_codon": "atg", - "stop_codon": "tag", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tag", + "count_internal_stop": 0 } }, "query_hit_columns": [], diff --git a/tests/test_data/outputs/search/G3/blast/nucleotide/hsps.txt b/tests/test_data/outputs/search/G3/blast/nucleotide/hsps.txt deleted file mode 100755 index cdab2ba..0000000 --- a/tests/test_data/outputs/search/G3/blast/nucleotide/hsps.txt +++ /dev/null @@ -1,20 +0,0 @@ -0 0 102 102 1 102 1 102 102 0 100.000 100 100 plus 1.80e-51 189 -1 9 762 762 1 762 1 762 762 0 100.000 100 100 plus 0.0 1408 -2 10 858 858 1 858 1 858 858 19 97.786 100 100 plus 0.0 1480 -3 11 972 972 1 972 1 972 972 0 100.000 100 100 plus 0.0 1796 -4 12 1098 1098 1 1098 1 1098 1098 0 100.000 100 100 plus 0.0 2028 -5 13 1281 1281 1 1281 1 1281 1281 11 99.141 100 100 plus 0.0 2305 -6 14 1434 1434 1 1434 1 1434 1434 0 100.000 100 100 plus 0.0 2649 -7 15 1464 1464 1 1464 1 1464 1464 15 98.975 100 100 plus 0.0 2621 -8 16 1836 1836 1 1836 1 1836 1836 0 100.000 100 100 plus 0.0 3391 -9 17 1914 1914 1 1914 1 1914 1914 0 100.000 100 100 plus 0.0 3535 -10 18 2037 2037 1 2037 1 2037 2037 16 99.215 100 100 plus 0.0 3674 -11 1 285 285 1 285 1 285 285 17 94.035 100 100 plus 2.31e-124 433 -12 19 4935 4935 1 4935 1 4935 4935 0 100.000 100 100 plus 0.0 9114 -13 2 327 327 1 327 1 327 327 0 100.000 100 100 plus 5.34e-176 604 -14 3 417 417 1 417 1 417 417 11 97.362 100 100 plus 0.0 710 -15 4 444 444 1 444 1 444 444 15 96.622 100 100 plus 0.0 737 -16 5 543 543 1 543 1 543 543 0 100.000 100 100 plus 0.0 1003 -17 6 606 606 1 606 1 606 606 15 97.525 100 100 plus 0.0 1037 -18 7 642 642 1 642 1 642 642 0 100.000 100 100 plus 0.0 1186 -19 8 684 684 1 684 1 684 684 0 100.000 100 100 plus 0.0 1264 diff --git a/tests/test_data/outputs/search/G3/blast/nucleotide/queries.fasta b/tests/test_data/outputs/search/G3/blast/nucleotide/queries.fasta deleted file mode 100755 index ce3c3a9..0000000 --- a/tests/test_data/outputs/search/G3/blast/nucleotide/queries.fasta +++ /dev/null @@ -1,40 +0,0 @@ ->0 -atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa ->1 -atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa ->2 -gtgaacctggtgaaaaccacgggtctgcacgcggcgggcaaaggcattaacgttgccaaagtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccgacttcaacttttccggctttgacgtgactcccgcagactgggaacgctttgttaacgactccctgagctggctgggtcagttcgatatggtctgcgtcagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatctttgatagtagccgtgaagcgttagtcgccggtcttaaagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcacgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgtgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggtaagtaccgtcggcgcaggcgattccatggttggcggattgatttacggcctgctgatgcgcgagtccagcgaacatacgctgcgtctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga ->3 -atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga ->4 -atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga ->5 -ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctcttccgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccaacgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcagaaaggcgggattggtttgcgcggtccgggtgaaacccagctcgaaaccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgaaaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgctgctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaaatccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtctgcaggtacgaatgccgattgtcgactggcgtcgcctctgtaaacaagaaccagcgttgatcgaatacgtgatctag ->6 -gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga ->7 -atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctggtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatcagcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacgctgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcattcaaccggaatggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaacgctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgaggcgctggcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtgagctgaccagccttgtgcgcaaccttaatcaactgctcaaaagcgagcgcgaacgttataacaaataccgcacgaccctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatcagcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcataccccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttttggccgacagcatcccacacagaaagaggaataa ->8 -atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa ->9 -atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag ->10 -atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacattgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccaccgtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggccgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattactggaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcattcagggcggtacggaagtttgtcagcaaaaactggataccatgcgcaacgcaggcgtaaaagtgaacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggcgtcagcgcagagatcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacaggagaccggcaagctcggcgagatcctgttctttatgcgtgcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccggcaataactggcagttcgacggcgacgcggaaactattgcccactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgaaattaccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa ->11 -atgaaactttttctgacaacggcagcgctaactgcgaccttgatttccggcatggcatttgccagcgatcccgtcatcccctgggcgactaacagcggcggtacggaaagcacgcatattgcggcaatgggcgaggatttgaatgcgcagcatcagcagatcacccacacgcatgaaggcgtatgggcggccaattccggcagcattcaggctgatgaagccgcgctgaccagcaacaagccgccggtacaaggccacccggaattgatgccgcatcagggctaa ->12 -atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa ->13 -atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa ->14 -ctgaaactaatacctttttatttgctcgcgctattttctgccgcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaaccactgcagataacccgccttataccatgacaatattgccgccagtagaaggctgcggggaaaaatgtgtgaaactgaacgtacagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaaggacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctataaccgacagctgcaattactttgggctggcgtaataccgaaaaataaaaaggggaaatggataacgaaaatggaaaattactggttcaggcaggtaaaagcgcactaa ->15 -atgaaaaaattttatagctgtcttcctgtctttttactgatcggctgtgcccaggtgcccctcccttcctccgtgagcaaaccggtacagcaacctggcgctcagaaagagcaactggccaacgcaaatagtattgatgagtgtcagtctcttccgtatgtgccgtcagaccttgcgaagaataaatcattatcaaaccagaacgctgataattccgcatcaaaaaatagcgcaatcagctcaagcattttttgcgaaaaatataaacaaaccaaagaacaggcgcttaccttcttccaggaacatccacaatacatgcgttcgaaagaagatgaagagcaactcatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa ->16 -atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa ->17 -gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggcctcacaacatgcctggagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgtcagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcaccatccagtcggcaacctggttttccgcaggcaggtcgcttacggcggataaaaaaatgatggattgcggcagcggtatttatgcttcaatcaatacgttactgaaaaagtcgcagaataaaaatatagtcatttttacccataatcattgtttgacgtatattgcaaaaaataagcgcggcgtgaagtttgatccggactatcttaatgcgctcgttatgtatgcggaaaacggaaaactctttttagacggtgaatttgttcccggctag ->18 -atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga ->19 -atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/search/G3/blast/protein/hsps.txt b/tests/test_data/outputs/search/G3/blast/protein/hsps.txt deleted file mode 100755 index 233979d..0000000 --- a/tests/test_data/outputs/search/G3/blast/protein/hsps.txt +++ /dev/null @@ -1,20 +0,0 @@ -0 0 34 34 1 34 1 34 34 0 100.000 100 100 N/A 4.94e-20 64.3 -1 9 254 254 1 254 1 254 254 0 100.000 100 100 N/A 0.0 514 -2 10 286 286 1 286 1 286 286 17 94.056 100 100 N/A 0.0 518 -3 11 324 324 1 324 1 324 324 0 100.000 100 100 N/A 0.0 640 -4 12 366 366 1 366 1 366 366 0 100.000 100 100 N/A 0.0 754 -5 13 427 427 1 427 1 427 427 9 97.892 100 100 N/A 0.0 846 -6 14 478 478 1 478 1 478 478 0 100.000 100 100 N/A 0.0 972 -7 15 488 488 1 488 1 488 488 14 97.131 100 100 N/A 0.0 972 -8 16 612 612 1 612 1 612 612 0 100.000 100 100 N/A 0.0 1241 -9 17 638 638 1 638 1 638 638 0 100.000 100 100 N/A 0.0 1326 -10 18 679 679 1 679 1 679 679 8 98.822 100 100 N/A 0.0 1403 -11 1 95 95 1 95 1 95 95 11 88.421 100 100 N/A 2.89e-51 147 -12 19 1645 1645 1 1645 1 1645 1645 0 100.000 100 100 N/A 0.0 3332 -13 2 109 109 1 109 1 109 109 0 100.000 100 100 N/A 3.09e-81 224 -14 3 139 139 1 139 1 139 139 9 93.525 100 100 N/A 3.70e-98 269 -15 4 148 148 1 148 1 148 148 11 92.568 100 100 N/A 1.56e-97 268 -16 5 181 181 1 181 1 181 181 0 100.000 100 100 N/A 1.79e-140 380 -17 6 202 202 1 202 1 202 202 14 93.069 100 100 N/A 2.36e-141 384 -18 7 214 214 1 214 1 214 214 0 100.000 100 100 N/A 1.75e-157 426 -19 8 228 228 1 228 1 228 228 0 100.000 100 100 N/A 6.32e-172 463 diff --git a/tests/test_data/outputs/search/G3/blast/protein/queries.fasta b/tests/test_data/outputs/search/G3/blast/protein/queries.fasta deleted file mode 100755 index 3ac162b..0000000 --- a/tests/test_data/outputs/search/G3/blast/protein/queries.fasta +++ /dev/null @@ -1,40 +0,0 @@ ->0 -MYDPPFLEALMITASFFAIFIIIVVSVLLLEGD* ->1 -MRILLSNDDGVHAPGIQTLAKALREFADVQVVAPDRNRSGASNSLTLESSLRTFTFDNGDIAVQMGTPTDCVYLGVNALMRPRPDIVVSGINAGPNLGDDVIYSGTVAAAMEGRHLGFPALAVSLNGYQHYDTAAAVTCALLRGLSREPLRTGRILNVNVPDLPLAQVKGIRVTRCGSRHPADKVIPQEDPRGNTLYWIGPPGDKYDAGPDTDFAAVDEGYVSVTPLHVDLTAHSAHDVVSDWLDSVGVGTQW* ->2 -VNLVKTTGLHAAGKGINVAKVLKDLGIDVTVGGFLGKDNQDGFQQLFSELGIANRFQVVQGRTRINVKLTEKDGEVTDFNFSGFDVTPADWERFVNDSLSWLGQFDMVCVSGSLPAGVSPEAFTDWMTRLRSQCPCIIFDSSREALVAGLKAAPWLVKPNRRELEIWAGRKLPEMKDVIDAAHALREQGIAHVVISLGAEGALWVNASGEWIAKPPAVDVVSTVGAGDSMVGGLIYGLLMRESSEHTLRLATAVAALAVSQSNVGITDRPQLAAMMARVDLQPFN* ->3 -MNKNKYSTPLLMLATILAGMLSPMQSAVNGQLGHWLQDGNACAVISFASGLVVMFFIIIARKETRQQFASIPTLIKKRKIPLWNWFAGLCGAMVVFSEGASASALGVATFQTALISALLLSGLLCDRFGIGVEEKKYFTPWRITGALFAVIATIFVVSPQWHSTSFILLAILPFLAGLLAGWQPAGNAKVAEATGSMLVSITWNFIVGFCVLGAALAIRIALGHVTIQLPDTWWMYLGGPLGLLSIGLMAILVRGLGLLMLGVASTAGQLLGSVLIDELIPSLGNTVYLVTIIGTLFALVGAIVTTIPEYRASKMAQKMEVSE* ->4 -MKGRWAKYVATGVMLAMLAACSSKPTDRGQQYKDGKFTQPFSLVNQPDAVGAPINAGDFAEQVDQIRSASPRLYTNQSNVYNAVQNWLRSGGDTRTMRQFGIDAWQMEGTDNYGNVQFTGYYTPVVQARHTRQGAFQYPIYSMPPKRGRLPSRAQIYAGALSDKYILAWSNSLMDNFIMDVQGSGYIDFGDGSPLNFFSYAGKNGWPYRSIGKVLIDRGEVKKEDMSMQAIREWGEKHSEAEVRELLEQNPSFVFFKPQSFAPVKGASAVPLIGRASVASDRSIIPPGTTLLAEVPLLDNNGKFSGQYELRLMVALDVGGAIKGQHFDIYQGIGPDAGHRAGWYNHYGRVWVLKSAPGAGNVFSG* ->5 -LFDRYDAGEQAVLVHIYFSQDKDMEDLQEFESLVSSAGVEAMQVITGSRKAPHPKYFVGEGKAVEIAEAVKATGAAVVLFDHALSPAQERNLERLCECRVIDRTGLILDIFAQRARTHEGKLQVELAQLRHLATRLVRGWTHLERQKGGIGLRGPGETQLETDRRLLRNRIVQIQSRLEKVEKQREQGRQSRIKADVPTVSLVGYTNAGKSTLFNQITEARVYAADQLFATLDPTLRRIDVADVGETVLADTVGFIRHLPYDLVAAFKATLQETRQATLLLHVVDAADVRVQENIEAVNTVLEEIDAHEIPTLMVMNKIDMLDDFEPRIDRDEENKPIRVWLSAQSGVGIPQLFQALTERLSGEVAQHTLRLLPQEGRLRSRFYQLQAIEKEWMEEDGSVSLQVRMPIVDWRRLCKQEPALIEYVI* ->6 -VAQRILVLGASGYIGQHLVFALSQQGHQVRAAARRVERLEKHRLANVSCHKVDLHWPENLPALLRDIDTVYYLVHGMGEGGDFIAHERQAALNVRDALRQTPVKQLIFLSSLQAPAHEQSDHLRARQLTADTLRDAGVPVTELRAGIIVGAGSAAFEVMRDMVYNLPILTPPRWVRSRTTPIALENLLYYLVGLLEHPAHEHRILEAAGPQVLSYQQQFERFMAVSGKRRPLIPVPFPTRWISVWFLNVITSVPPTTAKALIQGLRHDLLADDAALKKLIPQTLITFDDAVRRTLKEEEKLVNSSDWGYDALAFARWRPEYGYFPKQAGFTAQTPASLSALWQVVNRLGGKEGYFFGNILWQTRAAMDRLVGHKLAKGRPSHTLLKPGDTVDSWKVIIVEPEKQLTLLFGMKAPGLGRLSFTLHDKGRYREIDVRAWWHPHGMPGLIYWLLMIPAHLFIFRGMARRIARLAEQITEK* ->7 -MNKFARHFLPLSLRVRFLLATAGVVLVLSLAYGIVALVGYSVSFDKTTFRLLRGESNLFYTLAKWENNKISVELPENLDMQSPTMTLIYDETGKLLWTQRNIPWLIKSIQPEWLKTNGFHEIETNVDATSTLLSEDHSAQEKLKEVREDDDDAEMTHSVAVNIYPATTRMPQLTIVVVDTIPIELKRSYMVWSWFVYVLAANLLLVIPLLWIAAWWSLRPIEALAREVRELEDHHREMLNPETTRELTSLVRNLNQLLKSERERYNKYRTTLTDLTHSLKTPLAVLQSTLRSLRNEKMSVSKAEPVMLEQISRISQQIGYYLHRASMRGSGVLLSRELHPVAPLLDNLISALNKVYQRKGVNISMDISPEISFVGEQNDFVEVMGNVLDNACKYCLEFVEISARQTDDHLHIFVEDDGPGIPHSKRSLVFDRGQRADTLRPGQGVGLAVAREITEQYAGQIIASDSLLGGARMEVVFGRQHPTQKEE* ->8 -MTIQKRLLEAVEQKLLRPIDAQFALTVAGNDDPAVTLAAALLSHDAGEGHVCLPLSRLTLTEEAHPLLVAWISETATPIDWKKRLLASAAVSCGDSPAPLILCGDRLYLNRMWCNERTVARFFNEVNQAIAVDEDQLSRILDALFPPTDEVNWQKVAAAVALTRRISVISGGPGTGKTTTVAKLLAALIQMADGERCRIRLAAPTGKAAARLTESLGAALRQLPLTDAQKKRIPEDASTLHRLLGAQPGSQRLRHHAGNPLHLDVLVVDEASMIDLPMMSRLIDALPPHGRVIFLGDRDQLASVEAGAVLGDICAYVNAGFTAERARQLSRLTGSAIPAGAGTQAASLRDSLCLLQKSYRFGSDSGIGKLAAAINCGDRSAIQAVFQQGFSDIEKRTLQSSDDYAGMLDEALAGYGRYLRLLHEKAAPEAILQAFNEYQLLCALREGPFGVRGLNDRIEQAMVQQRKIQRHPHSRWYEGRPVMIARNDSALGLFNGDIGIALDRGQGLRVWFVMPDGTIKSVQPSRLPEHDTTWAMTVHKSQGSEFDHAALILPSQRSPVVTRELVYTAVTRARRRLSLYADERILAGAIVTRTERRSGLATLFDEVSRIG* ->9 -MQEVAMSSQEASKMLRTYNIAWWGNNYYDVNELGHISVCPDPDVPEARVDLAKLVKAREAQGQRLPALFCFPQILQHRLRSINAAFKRARESYGYNGDYFLVYPIKVNQHRRVIESLIHSGEPLGLEAGSKAELMAVLAHAGMTRSVIVCNGYKDREYIRLALIGEKMGHKVYLVIEKMSEIAIVLEEAERLNVVPRLGVRARLASQGSGKWQSSGGEKSKFGLAATQVLQLVETLRDAGRLDSLQLLHFHLGSQMANIRDIATGVRESARFYVELHKLGVNIQCFDVGGGLGVDYEGTRSQSDCSVNYGLNEYANNIIWAIGDACEEHGLPHPTVITESGRAVTAHHTVLVSNIIGVERNEYTDPTAPAEDAPRALQNLWETWQEMHKPGTRRSLREWLHDSQMDLHDIHIGYSSGAFSLQERAWAEQLYLSMCHEVQKQLDPQNRAHRPIIDELQERMADKMYVNFSLFQSMPDAWGIDQLFPVLPLEGLDQVPERRAVLLDITCDSDGAIDHYIDGDGIATTMPMPEYDPENPPMLGFFMVGAYQEILGNMHNLFGDTEAVDVFVFPDGSVEVELSDEGDTVADMLQYVQLDPKTLLTHFRDQVKQTDLDDALQQQFLEEFEAGLYGYTYLEDE* ->10 -MNSLPQRSTDFELTTSQDGFALSWQQRLILRHSAENPCLWIGAGVADIDMFRGNFSIKDKLNEKIALTEATVSELPDGWLVQFSRGATISATLRISADEAGRLTLDLQNDDLHHNRIWLRLAANPDDHIYGCGEQFSYFDLRGKPFPLWTSEQGVGRNKTSYVTWQADCKENAGGDYYWTFFPQPTFVSTQKYYCHVDNSCYMNFDFSAPEYHELALWEDKTTLRFECADTYIALLEKLTALLGRQPELPDWVYDGVTLGIQGGTEVCQQKLDTMRNAGVKVNGIWAQDWSGIRMTSFGKRVMWNWKWNSDNYPQLDSRIKQWKEEGVQFLSYINPYVASDKDLCAEAARHGYLAKDATGGDYLVEFGEFYGGVVDLTNPEAYDWFKDVIKKNMIALGCSGWMADFGEYLPTDTYLHNGVSAEIMHNAWPALWAKCNYEALQETGKLGEILFFMRAGYTGSQKYSTMMWAGDQNVDWSLDDGLASVVPAALSLAMTGHGLHHSDIGGYTTLFDMKRSKELLLRWCDFSAFTPMMRTHEGNRPGNNWQFDGDAETIAHFARMTTVFTTLKPYLKQAVAQNAATGLPVMRPLFLHYENDAATYTLKYQYLLGQDLLVAPVHEQGRCDWTLYLPEDHWVNIWTGEAHHGGEITVDAPIGKPPVFYRAKSEWALLFASLRNI* ->11 -MKLFLTTAALTATLISGMAFASDPVIPWATNSGGTESTHIAAMGEDLNAQHQQITHTHEGVWAANSGSIQADEAALTSNKPPVQGHPELMPHQG* ->12 -MKHLRVVACMIMLALAGCDNNDKTAPTTKSEAPAVAQPSPAQDPSQLQKLAQQSQGKALTLLDASEAQLDGAATLVLTFSIPLDPEQDFSRVVHVVDKKSGSVDGAWELAPNLKELRLRHLEPERVLVVTVDPAVKALNNATFGKSYEKTITTRDVQPSVGFASRGSLLPGKIAEGLPVMALNVNHVDVNFFRVKPGSLASFVSQWEYRSSLSNWESDNLLKMADLVYTGRFDLNPARNTREKLLLPLSDIKPLQQAGVYVAVMNQAGHYNYSNAATLFTLSDIGVSAHRYHSRLDIFTQSLENGAAQSGIEIVLLNDKGQTLAQATSDAQGHVQLEADKAAALLLARKEGQTTLLDLTLPALDLSEFNVAGAPGYSKQFFMFGPRDLYRPGETVILNGLLRDSDGKTLPDQPVKLEVVKPDGQVMRTVVSQPENGLYRLNYPLDINAPTGLWHVRANTGDNLLRSWDFHVEDFMPERMALNLTAQKTPLAPADEVKFSVVGYYLYGAPANGNTLQGQLFLRPLRDAVAALPGFQFGNIAEENLSRSLDEVQLTLDKGGRGEVSAASQWQEAHSPLQVILQASLLESGGRPVTRRVEQAIWPADTLPGIRPQFAAKAVYDYRTDTTVNQPIVDEDSNAAFDIVYANAQGEKKAVSGLQVRLIRERRDYYWNWSESEGWQSQFDQKDLVEGEQTLDLNADETGKVSFPVEWGAYRLEVKAPNETVSSVRFWAGYSWQDNSDGSGAARPDRVTLKLDKANYRPGDTMKLHIAAPVAGKGYAMVESSDGPLWWQAIDVPAQGLELTIPVDKTWNRHDLYLSTLVVRPGDKSRSATPKRAVGLLHLPLGDDNRRLDLALESPAKMRPNQPLTVRVKASVKHGEMPKQINVLVSAVDSGVLNITDYATPDPWQAFFGQKRYGADIYDIYGQVIEGQGRLAALRFGGDGDDLTRGGKPPVNHANIIAQQAQPITLNEQGEGVVTLPIGDFNGELRVMAQAWTADDFGRGESKVVVAAPVIAELNMPRFLAGGDVSRLVLDVTNLTDRPQTLNIALAASGLLELLSQQPQPVNLAPGVRTTLFVPVRALEGFGEGEIQATISGLNLPGETLGAQHKQWQIGVRPAWPAQTVNSGIALAPGESWHVPEQHLANVSPATLQGQLLLSGKPPLNLARYIRELKAYPYGCLEQTTSGLFPALYTNAAQLQSLGITGDSDEKRRAAVDIGISRILQMQRDNGGFALWDENGAEEPWLTAYAMDFLIRAGEQGYSVPPEAINRGNERLLRYLQDPGTMLIRYSDNTQASTFAAQAYAALVLARQQKAPLGALREIWERRSQAASGLPLMQLGIALNTMGDARRGEEAITLALNTPRQDERQWIADYGSSLRDNALMLSLLEENNLRPDAQNALLSSLSEQAFGQRWLSTQENNALFLAAHSRQASAGAWQAQTSLEAQPLSGDKALTRNLDADQLAALEVTNTGSQPLWLRLDSSGYPSSAPEPASNVLQIERQILGTDGQRKSLSSLRSGELVLVWLTVVADRNVPDALVVDLLPAGLELENQNLADSSASLPESGSEVQNLLNQMQQADIQYMEFRDDRFVAAVVVNEGQPVTLVYLARAVTPGTYQLAQPQVESMYAPQWRATGASEGLLIVTP* ->13 -MAKNRSRRLRKKMHIDEFQELGFSVAWRFPEGTSEEQIDKTVDDFINDVIEPNKLAFDGSGYLAWEGLICMQEIGKCTEEHQAIVRKWLEARNLEEVRTSELFDVWWD* ->14 -LKLIPFYLLALFSAASGATEINACKDLIGTWKTTADNPPYTMTILPPVEGCGEKCVKLNVQYELDVTHRNALYCHEGQEGVKGQGPMVIAFEGAYGGHAIGTYNRQLQLLWAGVIPKNKKGKWITKMENYWFRQVKAH* ->15 -MKKFYSCLPVFLLIGCAQVPLPSSVSKPVQQPGAQKEQLANANSIDECQSLPYVPSDLAKNKSLSNQNADNSASKNSAISSSIFCEKYKQTKEQALTFFQEHPQYMRSKEDEEQLMTEFKKVLLEPGSKNLSIYQTLLAAHERLQAL* ->16 -MRIKPDDNWRWYYDEEHDRMMLDLANGMLFRSRFSRKMLTPDAFCPTGFCVDDAALYFSFEEKCRDFELTKEQRAELVLNALVAIRYLKPQMPKSWHFVAHGEMWTPGTGDAASVWLSDTAEQVNLLVVEPGENAALCLLAQPGVVIAGRTMQLGDAIKIMNDRLKPQVHCHSFSLEQAV* ->17 -VLAFTLRFIKNKRYFAILAGALVIIAGLASQHAWSGNGLPQINGKALAALAKQHPVVVLFRHAERCDRSDNTCLSDSTGITVNGAQDARALGKAFSADIQNYNLYSSNTVRTIQSATWFSAGRSLTADKKMMDCGSGIYASINTLLKKSQNKNIVIFTHNHCLTYIAKNKRGVKFDPDYLNALVMYAENGKLFLDGEFVPG* ->18 -MGSNYIVIEGLEGAGKTTARDVVVETLEQLGIRNMIFTREPGGTQLAEKLRSLVLDIRSVGDEVITDKAEVLMFYAARVQLVETVIKPALAQGVWVIGDRHDLSTQAYQGGGRGIDQTMLATLRDAVLGDFRPDLTLYLDVTPEVGLKRARARGDLDRIEQESFDFFNRTRARYLELAAQDSRIRTIDATQPLDAVMRDIRATVTKWVQEQAA* ->19 -MKHIKKSVLVVLLTSHVAHASIVVGGTRLVFDGNNDESSINVENKDSKANLVQSWLSVADPQVTNKQVFIITPPLFRLDAGQKNSIRVIRSGAPLPADRESMYWLNIKGIPSIDDNASANRVEISINTQIKLIYRPPALTKSTPDSQSQQLKWQTAGDVITVNNPTPYYMNFASVTLNSHEVKSATFVPPKSSASFKLSSTAAPHGTVTWRLISDYGMSLEPHSGSF* diff --git a/tests/test_data/outputs/search/G3/run.json b/tests/test_data/outputs/search/G3/run.json index a346eb7..46a6195 100755 --- a/tests/test_data/outputs/search/G3/run.json +++ b/tests/test_data/outputs/search/G3/run.json @@ -1,11 +1,15 @@ { - "analysis_start_time": "10/06/2024 11:11:45", + "analysis_start_time": "19/06/2024 10:43:38", "parameters": { - "query": "locidex/extract/G3/raw.extracted.seqs.fasta", - "outdir": "locidex/search/G3", + "command": "search", + "query": "test_dev/extract/G3/raw.extracted.seqs.fasta", + "outdir": "test_dev/search/G3", + "db": "test_set/db", + "db_group": null, "name": "G3", - "db": "locidex/db", "config": null, + "db_name": "Locidex Test Database", + "db_version": "1.0.0", "min_evalue": 0.0001, "min_dna_len": 1, "min_aa_len": 1, @@ -20,8 +24,16 @@ "format": null, "translation_table": 11, "annotate": false, - "force": true + "force": true, + "db_date": "2024/06/06", + "db_author": "James Robertson", + "db_desc": "Using Salmonella GCA_000007545.2 as source data", + "db_num_seqs": 20, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" }, - "result_file": "locidex/search/G3/seq_store.json", - "analysis_end_time": "10/06/2024 11:11:47" + "result_file": "test_dev/search/G3/seq_store.json", + "analysis_end_time": "19/06/2024 10:43:39" } \ No newline at end of file diff --git a/tests/test_data/outputs/search/G3/seq_store.json b/tests/test_data/outputs/search/G3/seq_store.json index 67c8d69..19d9455 100755 --- a/tests/test_data/outputs/search/G3/seq_store.json +++ b/tests/test_data/outputs/search/G3/seq_store.json @@ -460,261 +460,301 @@ "parent_id": "locus_1:0:0:0", "locus_name": "locus_1:0:0:0", "seq_id": "locus_1:0:0:0", + "dna_seq": "atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa", + "dna_ambig_count": 0, "dna_hash": "d17b02d12afa7f832ee37df6f24a8f55", "dna_len": 102, + "aa_seq": "MYDPPFLEALMITASFFAIFIIIVVSVLLLEGD*", "aa_hash": "a931d1f75114576e60538364eb01a05f", "aa_len": 34, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "1": { "parent_id": "locus_10:9:0:1", "locus_name": "locus_10:9:0:1", "seq_id": "locus_10:9:0:1", + "dna_seq": "atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa", + "dna_ambig_count": 0, "dna_hash": "fe04d17ec353c08b903c85fc0ca4dc02", "dna_len": 762, + "aa_seq": "MRILLSNDDGVHAPGIQTLAKALREFADVQVVAPDRNRSGASNSLTLESSLRTFTFDNGDIAVQMGTPTDCVYLGVNALMRPRPDIVVSGINAGPNLGDDVIYSGTVAAAMEGRHLGFPALAVSLNGYQHYDTAAAVTCALLRGLSREPLRTGRILNVNVPDLPLAQVKGIRVTRCGSRHPADKVIPQEDPRGNTLYWIGPPGDKYDAGPDTDFAAVDEGYVSVTPLHVDLTAHSAHDVVSDWLDSVGVGTQW*", "aa_hash": "988bf512f0362e276b0e5622fbaa7079", "aa_len": 254, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "2": { "parent_id": "locus_11:10:0:2", "locus_name": "locus_11:10:0:2", "seq_id": "locus_11:10:0:2", + "dna_seq": "gtgaacctggtgaaaaccacgggtctgcacgcggcgggcaaaggcattaacgttgccaaagtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccgacttcaacttttccggctttgacgtgactcccgcagactgggaacgctttgttaacgactccctgagctggctgggtcagttcgatatggtctgcgtcagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatctttgatagtagccgtgaagcgttagtcgccggtcttaaagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcacgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgtgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggtaagtaccgtcggcgcaggcgattccatggttggcggattgatttacggcctgctgatgcgcgagtccagcgaacatacgctgcgtctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga", + "dna_ambig_count": 0, "dna_hash": "c4266f2f24fdd8e039113c6b0955af9f", "dna_len": 858, + "aa_seq": "VNLVKTTGLHAAGKGINVAKVLKDLGIDVTVGGFLGKDNQDGFQQLFSELGIANRFQVVQGRTRINVKLTEKDGEVTDFNFSGFDVTPADWERFVNDSLSWLGQFDMVCVSGSLPAGVSPEAFTDWMTRLRSQCPCIIFDSSREALVAGLKAAPWLVKPNRRELEIWAGRKLPEMKDVIDAAHALREQGIAHVVISLGAEGALWVNASGEWIAKPPAVDVVSTVGAGDSMVGGLIYGLLMRESSEHTLRLATAVAALAVSQSNVGITDRPQLAAMMARVDLQPFN*", "aa_hash": "9b9be0e0a2b6f84053716d6c14a0fb9a", "aa_len": 286, "start_codon": "gtg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "3": { "parent_id": "locus_12:11:0:3", "locus_name": "locus_12:11:0:3", "seq_id": "locus_12:11:0:3", + "dna_seq": "atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga", + "dna_ambig_count": 0, "dna_hash": "eb72da68c159497d5f0c8eeddc51b5ae", "dna_len": 972, + "aa_seq": "MNKNKYSTPLLMLATILAGMLSPMQSAVNGQLGHWLQDGNACAVISFASGLVVMFFIIIARKETRQQFASIPTLIKKRKIPLWNWFAGLCGAMVVFSEGASASALGVATFQTALISALLLSGLLCDRFGIGVEEKKYFTPWRITGALFAVIATIFVVSPQWHSTSFILLAILPFLAGLLAGWQPAGNAKVAEATGSMLVSITWNFIVGFCVLGAALAIRIALGHVTIQLPDTWWMYLGGPLGLLSIGLMAILVRGLGLLMLGVASTAGQLLGSVLIDELIPSLGNTVYLVTIIGTLFALVGAIVTTIPEYRASKMAQKMEVSE*", "aa_hash": "2fb77f6d0f615a030a08af74f3eda277", "aa_len": 324, "start_codon": "atg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "4": { "parent_id": "locus_13:12:0:4", "locus_name": "locus_13:12:0:4", "seq_id": "locus_13:12:0:4", + "dna_seq": "atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga", + "dna_ambig_count": 0, "dna_hash": "8f300259dcb46224bdc1fe5273107324", "dna_len": 1098, + "aa_seq": "MKGRWAKYVATGVMLAMLAACSSKPTDRGQQYKDGKFTQPFSLVNQPDAVGAPINAGDFAEQVDQIRSASPRLYTNQSNVYNAVQNWLRSGGDTRTMRQFGIDAWQMEGTDNYGNVQFTGYYTPVVQARHTRQGAFQYPIYSMPPKRGRLPSRAQIYAGALSDKYILAWSNSLMDNFIMDVQGSGYIDFGDGSPLNFFSYAGKNGWPYRSIGKVLIDRGEVKKEDMSMQAIREWGEKHSEAEVRELLEQNPSFVFFKPQSFAPVKGASAVPLIGRASVASDRSIIPPGTTLLAEVPLLDNNGKFSGQYELRLMVALDVGGAIKGQHFDIYQGIGPDAGHRAGWYNHYGRVWVLKSAPGAGNVFSG*", "aa_hash": "fd5140611af6e0d9f95426bf9beaa8b1", "aa_len": 366, "start_codon": "atg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "5": { "parent_id": "locus_14:13:0:5", "locus_name": "locus_14:13:0:5", "seq_id": "locus_14:13:0:5", + "dna_seq": "ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctcttccgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccaacgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcagaaaggcgggattggtttgcgcggtccgggtgaaacccagctcgaaaccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgaaaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgctgctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaaatccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtctgcaggtacgaatgccgattgtcgactggcgtcgcctctgtaaacaagaaccagcgttgatcgaatacgtgatctag", + "dna_ambig_count": 0, "dna_hash": "2fa0b06ed72e36b4071cab9d0b4f87d0", "dna_len": 1281, + "aa_seq": "LFDRYDAGEQAVLVHIYFSQDKDMEDLQEFESLVSSAGVEAMQVITGSRKAPHPKYFVGEGKAVEIAEAVKATGAAVVLFDHALSPAQERNLERLCECRVIDRTGLILDIFAQRARTHEGKLQVELAQLRHLATRLVRGWTHLERQKGGIGLRGPGETQLETDRRLLRNRIVQIQSRLEKVEKQREQGRQSRIKADVPTVSLVGYTNAGKSTLFNQITEARVYAADQLFATLDPTLRRIDVADVGETVLADTVGFIRHLPYDLVAAFKATLQETRQATLLLHVVDAADVRVQENIEAVNTVLEEIDAHEIPTLMVMNKIDMLDDFEPRIDRDEENKPIRVWLSAQSGVGIPQLFQALTERLSGEVAQHTLRLLPQEGRLRSRFYQLQAIEKEWMEEDGSVSLQVRMPIVDWRRLCKQEPALIEYVI*", "aa_hash": "bf5190f310477277da454725d434a8ee", "aa_len": 427, "start_codon": "ttg", - "stop_codon": "tag", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tag", + "count_internal_stop": 0 }, "6": { "parent_id": "locus_15:14:0:6", "locus_name": "locus_15:14:0:6", "seq_id": "locus_15:14:0:6", + "dna_seq": "gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga", + "dna_ambig_count": 0, "dna_hash": "bc98c2fe196a68a79036814396513a8d", "dna_len": 1434, + "aa_seq": "VAQRILVLGASGYIGQHLVFALSQQGHQVRAAARRVERLEKHRLANVSCHKVDLHWPENLPALLRDIDTVYYLVHGMGEGGDFIAHERQAALNVRDALRQTPVKQLIFLSSLQAPAHEQSDHLRARQLTADTLRDAGVPVTELRAGIIVGAGSAAFEVMRDMVYNLPILTPPRWVRSRTTPIALENLLYYLVGLLEHPAHEHRILEAAGPQVLSYQQQFERFMAVSGKRRPLIPVPFPTRWISVWFLNVITSVPPTTAKALIQGLRHDLLADDAALKKLIPQTLITFDDAVRRTLKEEEKLVNSSDWGYDALAFARWRPEYGYFPKQAGFTAQTPASLSALWQVVNRLGGKEGYFFGNILWQTRAAMDRLVGHKLAKGRPSHTLLKPGDTVDSWKVIIVEPEKQLTLLFGMKAPGLGRLSFTLHDKGRYREIDVRAWWHPHGMPGLIYWLLMIPAHLFIFRGMARRIARLAEQITEK*", "aa_hash": "5d8c3cf06444c3741ebfc57cc92db0d3", "aa_len": 478, "start_codon": "gtg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "7": { "parent_id": "locus_16:15:0:7", "locus_name": "locus_16:15:0:7", "seq_id": "locus_16:15:0:7", + "dna_seq": "atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctggtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatcagcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacgctgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcattcaaccggaatggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaacgctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgaggcgctggcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtgagctgaccagccttgtgcgcaaccttaatcaactgctcaaaagcgagcgcgaacgttataacaaataccgcacgaccctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatcagcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcataccccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttttggccgacagcatcccacacagaaagaggaataa", + "dna_ambig_count": 0, "dna_hash": "a9b3cb97dac3cda6e932a49bf9a507bd", "dna_len": 1464, + "aa_seq": "MNKFARHFLPLSLRVRFLLATAGVVLVLSLAYGIVALVGYSVSFDKTTFRLLRGESNLFYTLAKWENNKISVELPENLDMQSPTMTLIYDETGKLLWTQRNIPWLIKSIQPEWLKTNGFHEIETNVDATSTLLSEDHSAQEKLKEVREDDDDAEMTHSVAVNIYPATTRMPQLTIVVVDTIPIELKRSYMVWSWFVYVLAANLLLVIPLLWIAAWWSLRPIEALAREVRELEDHHREMLNPETTRELTSLVRNLNQLLKSERERYNKYRTTLTDLTHSLKTPLAVLQSTLRSLRNEKMSVSKAEPVMLEQISRISQQIGYYLHRASMRGSGVLLSRELHPVAPLLDNLISALNKVYQRKGVNISMDISPEISFVGEQNDFVEVMGNVLDNACKYCLEFVEISARQTDDHLHIFVEDDGPGIPHSKRSLVFDRGQRADTLRPGQGVGLAVAREITEQYAGQIIASDSLLGGARMEVVFGRQHPTQKEE*", "aa_hash": "3ca5f1d7b46eda9460608ef61603c12f", "aa_len": 488, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "8": { "parent_id": "locus_17:16:0:8", "locus_name": "locus_17:16:0:8", "seq_id": "locus_17:16:0:8", + "dna_seq": "atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa", + "dna_ambig_count": 0, "dna_hash": "a0d97d985483413f3c18bfe5833ae9ce", "dna_len": 1836, + "aa_seq": "MTIQKRLLEAVEQKLLRPIDAQFALTVAGNDDPAVTLAAALLSHDAGEGHVCLPLSRLTLTEEAHPLLVAWISETATPIDWKKRLLASAAVSCGDSPAPLILCGDRLYLNRMWCNERTVARFFNEVNQAIAVDEDQLSRILDALFPPTDEVNWQKVAAAVALTRRISVISGGPGTGKTTTVAKLLAALIQMADGERCRIRLAAPTGKAAARLTESLGAALRQLPLTDAQKKRIPEDASTLHRLLGAQPGSQRLRHHAGNPLHLDVLVVDEASMIDLPMMSRLIDALPPHGRVIFLGDRDQLASVEAGAVLGDICAYVNAGFTAERARQLSRLTGSAIPAGAGTQAASLRDSLCLLQKSYRFGSDSGIGKLAAAINCGDRSAIQAVFQQGFSDIEKRTLQSSDDYAGMLDEALAGYGRYLRLLHEKAAPEAILQAFNEYQLLCALREGPFGVRGLNDRIEQAMVQQRKIQRHPHSRWYEGRPVMIARNDSALGLFNGDIGIALDRGQGLRVWFVMPDGTIKSVQPSRLPEHDTTWAMTVHKSQGSEFDHAALILPSQRSPVVTRELVYTAVTRARRRLSLYADERILAGAIVTRTERRSGLATLFDEVSRIG*", "aa_hash": "bdd2b967a7a6337b5973dd723a62fa43", "aa_len": 612, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "9": { "parent_id": "locus_18:17:0:9", "locus_name": "locus_18:17:0:9", "seq_id": "locus_18:17:0:9", + "dna_seq": "atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag", + "dna_ambig_count": 0, "dna_hash": "b3021e979faa7600756c06dfadfcf14c", "dna_len": 1914, + "aa_seq": "MQEVAMSSQEASKMLRTYNIAWWGNNYYDVNELGHISVCPDPDVPEARVDLAKLVKAREAQGQRLPALFCFPQILQHRLRSINAAFKRARESYGYNGDYFLVYPIKVNQHRRVIESLIHSGEPLGLEAGSKAELMAVLAHAGMTRSVIVCNGYKDREYIRLALIGEKMGHKVYLVIEKMSEIAIVLEEAERLNVVPRLGVRARLASQGSGKWQSSGGEKSKFGLAATQVLQLVETLRDAGRLDSLQLLHFHLGSQMANIRDIATGVRESARFYVELHKLGVNIQCFDVGGGLGVDYEGTRSQSDCSVNYGLNEYANNIIWAIGDACEEHGLPHPTVITESGRAVTAHHTVLVSNIIGVERNEYTDPTAPAEDAPRALQNLWETWQEMHKPGTRRSLREWLHDSQMDLHDIHIGYSSGAFSLQERAWAEQLYLSMCHEVQKQLDPQNRAHRPIIDELQERMADKMYVNFSLFQSMPDAWGIDQLFPVLPLEGLDQVPERRAVLLDITCDSDGAIDHYIDGDGIATTMPMPEYDPENPPMLGFFMVGAYQEILGNMHNLFGDTEAVDVFVFPDGSVEVELSDEGDTVADMLQYVQLDPKTLLTHFRDQVKQTDLDDALQQQFLEEFEAGLYGYTYLEDE*", "aa_hash": "42c4a831ee79a27c47138fe96829814b", "aa_len": 638, "start_codon": "atg", - "stop_codon": "tag", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tag", + "count_internal_stop": 0 }, "10": { "parent_id": "locus_19:18:0:10", "locus_name": "locus_19:18:0:10", "seq_id": "locus_19:18:0:10", + "dna_seq": "atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacattgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccaccgtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggccgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattactggaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcattcagggcggtacggaagtttgtcagcaaaaactggataccatgcgcaacgcaggcgtaaaagtgaacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggcgtcagcgcagagatcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacaggagaccggcaagctcggcgagatcctgttctttatgcgtgcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccggcaataactggcagttcgacggcgacgcggaaactattgcccactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgaaattaccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa", + "dna_ambig_count": 0, "dna_hash": "de32372598811d63bcc1a0eaf6872644", "dna_len": 2037, + "aa_seq": "MNSLPQRSTDFELTTSQDGFALSWQQRLILRHSAENPCLWIGAGVADIDMFRGNFSIKDKLNEKIALTEATVSELPDGWLVQFSRGATISATLRISADEAGRLTLDLQNDDLHHNRIWLRLAANPDDHIYGCGEQFSYFDLRGKPFPLWTSEQGVGRNKTSYVTWQADCKENAGGDYYWTFFPQPTFVSTQKYYCHVDNSCYMNFDFSAPEYHELALWEDKTTLRFECADTYIALLEKLTALLGRQPELPDWVYDGVTLGIQGGTEVCQQKLDTMRNAGVKVNGIWAQDWSGIRMTSFGKRVMWNWKWNSDNYPQLDSRIKQWKEEGVQFLSYINPYVASDKDLCAEAARHGYLAKDATGGDYLVEFGEFYGGVVDLTNPEAYDWFKDVIKKNMIALGCSGWMADFGEYLPTDTYLHNGVSAEIMHNAWPALWAKCNYEALQETGKLGEILFFMRAGYTGSQKYSTMMWAGDQNVDWSLDDGLASVVPAALSLAMTGHGLHHSDIGGYTTLFDMKRSKELLLRWCDFSAFTPMMRTHEGNRPGNNWQFDGDAETIAHFARMTTVFTTLKPYLKQAVAQNAATGLPVMRPLFLHYENDAATYTLKYQYLLGQDLLVAPVHEQGRCDWTLYLPEDHWVNIWTGEAHHGGEITVDAPIGKPPVFYRAKSEWALLFASLRNI*", "aa_hash": "a48a4e4dc8c7f61a7be06a7f72142198", "aa_len": 679, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "11": { "parent_id": "locus_2:1:0:11", "locus_name": "locus_2:1:0:11", "seq_id": "locus_2:1:0:11", + "dna_seq": "atgaaactttttctgacaacggcagcgctaactgcgaccttgatttccggcatggcatttgccagcgatcccgtcatcccctgggcgactaacagcggcggtacggaaagcacgcatattgcggcaatgggcgaggatttgaatgcgcagcatcagcagatcacccacacgcatgaaggcgtatgggcggccaattccggcagcattcaggctgatgaagccgcgctgaccagcaacaagccgccggtacaaggccacccggaattgatgccgcatcagggctaa", + "dna_ambig_count": 0, "dna_hash": "8b70e777f6bbf2c91ff75947824b5976", "dna_len": 285, + "aa_seq": "MKLFLTTAALTATLISGMAFASDPVIPWATNSGGTESTHIAAMGEDLNAQHQQITHTHEGVWAANSGSIQADEAALTSNKPPVQGHPELMPHQG*", "aa_hash": "6e403f4ed2da629ea2ebfe18278ed120", "aa_len": 95, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "12": { "parent_id": "locus_20:19:0:12", "locus_name": "locus_20:19:0:12", "seq_id": "locus_20:19:0:12", + "dna_seq": "atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa", + "dna_ambig_count": 0, "dna_hash": "4461918e985715e4a2b07494e1f91326", "dna_len": 4935, + "aa_seq": "MKHLRVVACMIMLALAGCDNNDKTAPTTKSEAPAVAQPSPAQDPSQLQKLAQQSQGKALTLLDASEAQLDGAATLVLTFSIPLDPEQDFSRVVHVVDKKSGSVDGAWELAPNLKELRLRHLEPERVLVVTVDPAVKALNNATFGKSYEKTITTRDVQPSVGFASRGSLLPGKIAEGLPVMALNVNHVDVNFFRVKPGSLASFVSQWEYRSSLSNWESDNLLKMADLVYTGRFDLNPARNTREKLLLPLSDIKPLQQAGVYVAVMNQAGHYNYSNAATLFTLSDIGVSAHRYHSRLDIFTQSLENGAAQSGIEIVLLNDKGQTLAQATSDAQGHVQLEADKAAALLLARKEGQTTLLDLTLPALDLSEFNVAGAPGYSKQFFMFGPRDLYRPGETVILNGLLRDSDGKTLPDQPVKLEVVKPDGQVMRTVVSQPENGLYRLNYPLDINAPTGLWHVRANTGDNLLRSWDFHVEDFMPERMALNLTAQKTPLAPADEVKFSVVGYYLYGAPANGNTLQGQLFLRPLRDAVAALPGFQFGNIAEENLSRSLDEVQLTLDKGGRGEVSAASQWQEAHSPLQVILQASLLESGGRPVTRRVEQAIWPADTLPGIRPQFAAKAVYDYRTDTTVNQPIVDEDSNAAFDIVYANAQGEKKAVSGLQVRLIRERRDYYWNWSESEGWQSQFDQKDLVEGEQTLDLNADETGKVSFPVEWGAYRLEVKAPNETVSSVRFWAGYSWQDNSDGSGAARPDRVTLKLDKANYRPGDTMKLHIAAPVAGKGYAMVESSDGPLWWQAIDVPAQGLELTIPVDKTWNRHDLYLSTLVVRPGDKSRSATPKRAVGLLHLPLGDDNRRLDLALESPAKMRPNQPLTVRVKASVKHGEMPKQINVLVSAVDSGVLNITDYATPDPWQAFFGQKRYGADIYDIYGQVIEGQGRLAALRFGGDGDDLTRGGKPPVNHANIIAQQAQPITLNEQGEGVVTLPIGDFNGELRVMAQAWTADDFGRGESKVVVAAPVIAELNMPRFLAGGDVSRLVLDVTNLTDRPQTLNIALAASGLLELLSQQPQPVNLAPGVRTTLFVPVRALEGFGEGEIQATISGLNLPGETLGAQHKQWQIGVRPAWPAQTVNSGIALAPGESWHVPEQHLANVSPATLQGQLLLSGKPPLNLARYIRELKAYPYGCLEQTTSGLFPALYTNAAQLQSLGITGDSDEKRRAAVDIGISRILQMQRDNGGFALWDENGAEEPWLTAYAMDFLIRAGEQGYSVPPEAINRGNERLLRYLQDPGTMLIRYSDNTQASTFAAQAYAALVLARQQKAPLGALREIWERRSQAASGLPLMQLGIALNTMGDARRGEEAITLALNTPRQDERQWIADYGSSLRDNALMLSLLEENNLRPDAQNALLSSLSEQAFGQRWLSTQENNALFLAAHSRQASAGAWQAQTSLEAQPLSGDKALTRNLDADQLAALEVTNTGSQPLWLRLDSSGYPSSAPEPASNVLQIERQILGTDGQRKSLSSLRSGELVLVWLTVVADRNVPDALVVDLLPAGLELENQNLADSSASLPESGSEVQNLLNQMQQADIQYMEFRDDRFVAAVVVNEGQPVTLVYLARAVTPGTYQLAQPQVESMYAPQWRATGASEGLLIVTP*", "aa_hash": "ffe361d9273afbab0ed9009dd5aa4041", "aa_len": 1645, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "13": { "parent_id": "locus_3:2:0:13", "locus_name": "locus_3:2:0:13", "seq_id": "locus_3:2:0:13", + "dna_seq": "atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa", + "dna_ambig_count": 0, "dna_hash": "670705cd2a59c4a23a897ac656a888fe", "dna_len": 327, + "aa_seq": "MAKNRSRRLRKKMHIDEFQELGFSVAWRFPEGTSEEQIDKTVDDFINDVIEPNKLAFDGSGYLAWEGLICMQEIGKCTEEHQAIVRKWLEARNLEEVRTSELFDVWWD*", "aa_hash": "f46f4f7b2362abaf1440c9752e10670e", "aa_len": 109, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "14": { "parent_id": "locus_4:3:0:14", "locus_name": "locus_4:3:0:14", "seq_id": "locus_4:3:0:14", + "dna_seq": "ctgaaactaatacctttttatttgctcgcgctattttctgccgcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaaccactgcagataacccgccttataccatgacaatattgccgccagtagaaggctgcggggaaaaatgtgtgaaactgaacgtacagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaaggacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctataaccgacagctgcaattactttgggctggcgtaataccgaaaaataaaaaggggaaatggataacgaaaatggaaaattactggttcaggcaggtaaaagcgcactaa", + "dna_ambig_count": 0, "dna_hash": "73790840c76943caac0ebb3b2b3f0b98", "dna_len": 417, + "aa_seq": "LKLIPFYLLALFSAASGATEINACKDLIGTWKTTADNPPYTMTILPPVEGCGEKCVKLNVQYELDVTHRNALYCHEGQEGVKGQGPMVIAFEGAYGGHAIGTYNRQLQLLWAGVIPKNKKGKWITKMENYWFRQVKAH*", "aa_hash": "77784601d754a5f36152853592023b08", "aa_len": 139, "start_codon": "ctg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "15": { "parent_id": "locus_5:4:0:15", "locus_name": "locus_5:4:0:15", "seq_id": "locus_5:4:0:15", + "dna_seq": "atgaaaaaattttatagctgtcttcctgtctttttactgatcggctgtgcccaggtgcccctcccttcctccgtgagcaaaccggtacagcaacctggcgctcagaaagagcaactggccaacgcaaatagtattgatgagtgtcagtctcttccgtatgtgccgtcagaccttgcgaagaataaatcattatcaaaccagaacgctgataattccgcatcaaaaaatagcgcaatcagctcaagcattttttgcgaaaaatataaacaaaccaaagaacaggcgcttaccttcttccaggaacatccacaatacatgcgttcgaaagaagatgaagagcaactcatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa", + "dna_ambig_count": 0, "dna_hash": "8cf4341689dd00f74adfcc43d1f4a35e", "dna_len": 444, + "aa_seq": "MKKFYSCLPVFLLIGCAQVPLPSSVSKPVQQPGAQKEQLANANSIDECQSLPYVPSDLAKNKSLSNQNADNSASKNSAISSSIFCEKYKQTKEQALTFFQEHPQYMRSKEDEEQLMTEFKKVLLEPGSKNLSIYQTLLAAHERLQAL*", "aa_hash": "736cc3184dda2c5ac596f76753272622", "aa_len": 148, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "16": { "parent_id": "locus_6:5:0:16", "locus_name": "locus_6:5:0:16", "seq_id": "locus_6:5:0:16", + "dna_seq": "atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa", + "dna_ambig_count": 0, "dna_hash": "a11561f2804e2c32c78049f8b9aeb517", "dna_len": 543, + "aa_seq": "MRIKPDDNWRWYYDEEHDRMMLDLANGMLFRSRFSRKMLTPDAFCPTGFCVDDAALYFSFEEKCRDFELTKEQRAELVLNALVAIRYLKPQMPKSWHFVAHGEMWTPGTGDAASVWLSDTAEQVNLLVVEPGENAALCLLAQPGVVIAGRTMQLGDAIKIMNDRLKPQVHCHSFSLEQAV*", "aa_hash": "f7a904be6607ca5a6d9388ad9ca3693a", "aa_len": 181, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "17": { "parent_id": "locus_7:6:0:17", "locus_name": "locus_7:6:0:17", "seq_id": "locus_7:6:0:17", + "dna_seq": "gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggcctcacaacatgcctggagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgtcagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcaccatccagtcggcaacctggttttccgcaggcaggtcgcttacggcggataaaaaaatgatggattgcggcagcggtatttatgcttcaatcaatacgttactgaaaaagtcgcagaataaaaatatagtcatttttacccataatcattgtttgacgtatattgcaaaaaataagcgcggcgtgaagtttgatccggactatcttaatgcgctcgttatgtatgcggaaaacggaaaactctttttagacggtgaatttgttcccggctag", + "dna_ambig_count": 0, "dna_hash": "49d9878c9d3071aa1d2f26cb947b784c", "dna_len": 606, + "aa_seq": "VLAFTLRFIKNKRYFAILAGALVIIAGLASQHAWSGNGLPQINGKALAALAKQHPVVVLFRHAERCDRSDNTCLSDSTGITVNGAQDARALGKAFSADIQNYNLYSSNTVRTIQSATWFSAGRSLTADKKMMDCGSGIYASINTLLKKSQNKNIVIFTHNHCLTYIAKNKRGVKFDPDYLNALVMYAENGKLFLDGEFVPG*", "aa_hash": "a1169e1ef4c2882247a9349da07cb6bd", "aa_len": 202, "start_codon": "gtg", - "stop_codon": "tag", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tag", + "count_internal_stop": 0 }, "18": { "parent_id": "locus_8:7:0:18", "locus_name": "locus_8:7:0:18", "seq_id": "locus_8:7:0:18", + "dna_seq": "atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga", + "dna_ambig_count": 0, "dna_hash": "7ebe74afecf146ec4db816c8deced64f", "dna_len": 642, + "aa_seq": "MGSNYIVIEGLEGAGKTTARDVVVETLEQLGIRNMIFTREPGGTQLAEKLRSLVLDIRSVGDEVITDKAEVLMFYAARVQLVETVIKPALAQGVWVIGDRHDLSTQAYQGGGRGIDQTMLATLRDAVLGDFRPDLTLYLDVTPEVGLKRARARGDLDRIEQESFDFFNRTRARYLELAAQDSRIRTIDATQPLDAVMRDIRATVTKWVQEQAA*", "aa_hash": "ad5a47cb3a55985083b963b0cf67f382", "aa_len": 214, "start_codon": "atg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "19": { "parent_id": "locus_9:8:0:19", "locus_name": "locus_9:8:0:19", "seq_id": "locus_9:8:0:19", + "dna_seq": "atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag", + "dna_ambig_count": 0, "dna_hash": "41ebb36872854b2b33c8c028e23d8ad1", "dna_len": 684, + "aa_seq": "MKHIKKSVLVVLLTSHVAHASIVVGGTRLVFDGNNDESSINVENKDSKANLVQSWLSVADPQVTNKQVFIITPPLFRLDAGQKNSIRVIRSGAPLPADRESMYWLNIKGIPSIDDNASANRVEISINTQIKLIYRPPALTKSTPDSQSQQLKWQTAGDVITVNNPTPYYMNFASVTLNSHEVKSATFVPPKSSASFKLSSTAAPHGTVTWRLISDYGMSLEPHSGSF*", "aa_hash": "02989d1dbb505d15f06e76ddc9174b7e", "aa_len": 228, "start_codon": "atg", - "stop_codon": "tag", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tag", + "count_internal_stop": 0 } }, "query_hit_columns": [], diff --git a/tests/test_data/outputs/search/G4/blast/nucleotide/hsps.txt b/tests/test_data/outputs/search/G4/blast/nucleotide/hsps.txt deleted file mode 100755 index 00093e5..0000000 --- a/tests/test_data/outputs/search/G4/blast/nucleotide/hsps.txt +++ /dev/null @@ -1,20 +0,0 @@ -0 0 102 102 1 102 1 102 102 0 100.000 100 100 plus 1.80e-51 189 -1 9 762 762 1 762 1 762 762 0 100.000 100 100 plus 0.0 1408 -2 10 858 858 1 858 1 858 858 0 100.000 100 100 plus 0.0 1585 -3 11 972 972 1 972 1 972 972 0 100.000 100 100 plus 0.0 1796 -4 12 1098 1098 1 1098 1 1098 1098 0 100.000 100 100 plus 0.0 2028 -5 13 1281 1281 1 1281 1 1281 1281 0 100.000 100 100 plus 0.0 2366 -6 14 1434 1434 1 1434 1 1434 1434 0 100.000 100 100 plus 0.0 2649 -7 15 1464 1464 1 1464 1 1464 1464 0 100.000 100 100 plus 0.0 2704 -8 16 1836 1836 1 1836 1 1836 1836 0 100.000 100 100 plus 0.0 3391 -9 17 1914 1914 1 1914 1 1914 1914 0 100.000 100 100 plus 0.0 3535 -10 18 2037 2037 1 2037 1 2037 2037 0 100.000 100 100 plus 0.0 3762 -11 1 285 285 1 285 1 285 285 0 100.000 100 100 plus 1.03e-152 527 -12 19 4935 4935 1 4935 1 4935 4935 0 100.000 100 100 plus 0.0 9114 -13 2 327 327 1 327 1 327 327 0 100.000 100 100 plus 5.34e-176 604 -14 3 417 417 1 417 1 417 417 0 100.000 100 100 plus 0.0 771 -15 4 444 444 1 444 1 444 444 0 100.000 100 100 plus 0.0 821 -16 5 543 543 1 543 1 543 543 0 100.000 100 100 plus 0.0 1003 -17 6 606 606 1 606 1 606 606 0 100.000 100 100 plus 0.0 1120 -18 7 642 642 1 642 1 642 642 0 100.000 100 100 plus 0.0 1186 -19 8 684 684 1 684 1 684 684 0 100.000 100 100 plus 0.0 1264 diff --git a/tests/test_data/outputs/search/G4/blast/nucleotide/queries.fasta b/tests/test_data/outputs/search/G4/blast/nucleotide/queries.fasta deleted file mode 100755 index 9cad9a4..0000000 --- a/tests/test_data/outputs/search/G4/blast/nucleotide/queries.fasta +++ /dev/null @@ -1,40 +0,0 @@ ->0 -atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa ->1 -atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa ->2 -gtgaacctggtgaaaaccccgggtctgcacgcggcgggcaaaggcattaacgttgccaatgtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccaacttcaactttcccggctttgacgtgactcccgcagactgggaacgctttgttaacgacttcctgagctggctgggtcagttcgatatggtctgcgtaagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatcgttgatactagccgtgtagagttagtcgccggtcttatagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcccgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgcgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggaaaataccgtcggcgcaggcggttccatggttggcggatggatttacggcctgctgatgcgcgagtccaccgaacatacgctgcgcctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga ->3 -atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga ->4 -atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga ->5 -ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctattacgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccagcgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcggaaaggcgggattggttcgcgcggtccgggtgaaacccagctcgaagccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgagaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgcagctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaattccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtccgcaggtacgaatgccgattgtcgactggcgtcgcttctgtaaacaagaaccagcgttgatcgaatacgtgatctag ->6 -gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga ->7 -atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctcgtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatctgcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacggtgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcactcaaccggaagggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaaggctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgatgcgctgtcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtaagctgatcagccttgtgcgcaaccttaatcaactgctcaaaagcgagcacgaacgttataacaaataccgcacgagcctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatctgcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcatagcccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttctggccgacagcatcccacacagaaagaggaataa ->8 -atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa ->9 -atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag ->10 -atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacatcgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccacagtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggtcgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattacttgaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcatacagggcggtacggaagtttgtcagcaaaaactggataacatgcgcaacgcaggcgtaaaagtgtacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggggtcagcgcagagctcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacagaagaccggcaagctcggcgagatcctgttctttatgcgggcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccagcaatgactggcagttcgacggcgacgcggaaactattgcacactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgagatttccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa ->11 -atgaaactttttctgacaacggcagcgctaactgcgaccttgacttccggcatgggatttgccagcgataccgtgatcccctgggcgactaacagtggcggtacggaaagcacgcatatcgcggcaatgggcgaggatgtgaatgcgcagcatcagcacatcatccacacgcatgaaggcgtatgtgccgcaaattccggcaccattcaggctgatgaagccgcgctgaccagcaacaagccaccggtacaagtccaaccggaattgttgccgcatcagggctaa ->12 -atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa ->13 -atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa ->14 -ctgaaactaatacctttttatttgctcgcgctattttctgcctcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaacccctgcagataacccgccttataccgtgacaatattgccgccagtagaagcctgcggggaaaaatgtgtgaaactgaacgtagagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaacgacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctacaaccgacagctgcaattacttagggctggcgtaataccgaaaaataaaaagtggaaatggataacgaagatggaaaattactggttcaggcgggtaaaagcgcactaa ->15 -atgaaaaaatttgatagctgtcatcctgtctttttactgatcggctgtgcccaggtgcctctccgttcctccgtgagcaaaccggtacagcaacctagcgctcagaaagagcaactggccaacgcaaatggtattgatgagtgtcagtcacttccgtatgtgccttcagaccttgcgaagaataaatcattatcaaaccagatcgctgataataccgcatcaaaaaatagcgcaatcagctcacgcattttttgcgaaaaatataagcaaaccaaagaacaggcgcttaccttcttccaggaacttccacaatacatgcgttcgaaagaagttgaagagcaacacatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa ->16 -atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa ->17 -gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggactcacaacatgcctgtagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgccagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcgccatccagtcggcaacctggttttcggcaggcaggtcgcgtagggcggttaaaaaaatgatggattgcggcagcggtatttattcttcaatcaatacgttactgaaaaattcgcagattaaaaatatagtcattttaacccataatcattgtttgacgtatattgtaaaaaataagcgcggcgtgaagtttgatccggagtatcttaatgcgctcgttatgtatgcggaaaacggaaaactccttttagacggtgaatttgttcccggctag ->18 -atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga ->19 -atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/search/G4/blast/protein/hsps.txt b/tests/test_data/outputs/search/G4/blast/protein/hsps.txt deleted file mode 100755 index 73033ac..0000000 --- a/tests/test_data/outputs/search/G4/blast/protein/hsps.txt +++ /dev/null @@ -1,20 +0,0 @@ -0 0 34 34 1 34 1 34 34 0 100.000 100 100 N/A 4.94e-20 64.3 -1 9 254 254 1 254 1 254 254 0 100.000 100 100 N/A 0.0 514 -2 10 286 286 1 286 1 286 286 0 100.000 100 100 N/A 0.0 579 -3 11 324 324 1 324 1 324 324 0 100.000 100 100 N/A 0.0 640 -4 12 366 366 1 366 1 366 366 0 100.000 100 100 N/A 0.0 754 -5 13 427 427 1 427 1 427 427 0 100.000 100 100 N/A 0.0 869 -6 14 478 478 1 478 1 478 478 0 100.000 100 100 N/A 0.0 972 -7 15 488 488 1 488 1 488 488 0 100.000 100 100 N/A 0.0 1004 -8 16 612 612 1 612 1 612 612 0 100.000 100 100 N/A 0.0 1241 -9 17 638 638 1 638 1 638 638 0 100.000 100 100 N/A 0.0 1326 -10 18 679 679 1 679 1 679 679 0 100.000 100 100 N/A 0.0 1419 -11 1 95 95 1 95 1 95 95 0 100.000 100 100 N/A 1.23e-68 191 -12 19 1645 1645 1 1645 1 1645 1645 0 100.000 100 100 N/A 0.0 3332 -13 2 109 109 1 109 1 109 109 0 100.000 100 100 N/A 3.09e-81 224 -14 3 139 139 1 139 1 139 139 0 100.000 100 100 N/A 2.34e-106 290 -15 4 148 148 1 148 1 148 148 0 100.000 100 100 N/A 1.04e-110 302 -16 5 181 181 1 181 1 181 181 0 100.000 100 100 N/A 1.79e-140 380 -17 6 202 202 1 202 1 202 202 0 100.000 100 100 N/A 3.71e-154 416 -18 7 214 214 1 214 1 214 214 0 100.000 100 100 N/A 1.75e-157 426 -19 8 228 228 1 228 1 228 228 0 100.000 100 100 N/A 6.32e-172 463 diff --git a/tests/test_data/outputs/search/G4/blast/protein/queries.fasta b/tests/test_data/outputs/search/G4/blast/protein/queries.fasta deleted file mode 100755 index 34499a1..0000000 --- a/tests/test_data/outputs/search/G4/blast/protein/queries.fasta +++ /dev/null @@ -1,40 +0,0 @@ ->0 -MYDPPFLEALMITASFFAIFIIIVVSVLLLEGD* ->1 -MRILLSNDDGVHAPGIQTLAKALREFADVQVVAPDRNRSGASNSLTLESSLRTFTFDNGDIAVQMGTPTDCVYLGVNALMRPRPDIVVSGINAGPNLGDDVIYSGTVAAAMEGRHLGFPALAVSLNGYQHYDTAAAVTCALLRGLSREPLRTGRILNVNVPDLPLAQVKGIRVTRCGSRHPADKVIPQEDPRGNTLYWIGPPGDKYDAGPDTDFAAVDEGYVSVTPLHVDLTAHSAHDVVSDWLDSVGVGTQW* ->2 -VNLVKTPGLHAAGKGINVANVLKDLGIDVTVGGFLGKDNQDGFQQLFSELGIANRFQVVQGRTRINVKLTEKDGEVTNFNFPGFDVTPADWERFVNDFLSWLGQFDMVCVSGSLPAGVSPEAFTDWMTRLRSQCPCIIVDTSRVELVAGLIAAPWLVKPNRRELEIWAGRKLPEMKDVIDAAPALREQGIAHVVISLGAEGALRVNASGEWIAKPPAVDVENTVGAGGSMVGGWIYGLLMRESTEHTLRLATAVAALAVSQSNVGITDRPQLAAMMARVDLQPFN* ->3 -MNKNKYSTPLLMLATILAGMLSPMQSAVNGQLGHWLQDGNACAVISFASGLVVMFFIIIARKETRQQFASIPTLIKKRKIPLWNWFAGLCGAMVVFSEGASASALGVATFQTALISALLLSGLLCDRFGIGVEEKKYFTPWRITGALFAVIATIFVVSPQWHSTSFILLAILPFLAGLLAGWQPAGNAKVAEATGSMLVSITWNFIVGFCVLGAALAIRIALGHVTIQLPDTWWMYLGGPLGLLSIGLMAILVRGLGLLMLGVASTAGQLLGSVLIDELIPSLGNTVYLVTIIGTLFALVGAIVTTIPEYRASKMAQKMEVSE* ->4 -MKGRWAKYVATGVMLAMLAACSSKPTDRGQQYKDGKFTQPFSLVNQPDAVGAPINAGDFAEQVDQIRSASPRLYTNQSNVYNAVQNWLRSGGDTRTMRQFGIDAWQMEGTDNYGNVQFTGYYTPVVQARHTRQGAFQYPIYSMPPKRGRLPSRAQIYAGALSDKYILAWSNSLMDNFIMDVQGSGYIDFGDGSPLNFFSYAGKNGWPYRSIGKVLIDRGEVKKEDMSMQAIREWGEKHSEAEVRELLEQNPSFVFFKPQSFAPVKGASAVPLIGRASVASDRSIIPPGTTLLAEVPLLDNNGKFSGQYELRLMVALDVGGAIKGQHFDIYQGIGPDAGHRAGWYNHYGRVWVLKSAPGAGNVFSG* ->5 -LFDRYDAGEQAVLVHIYFSQDKDMEDLQEFESLVYYAGVEAMQVITGSRKAPHPKYFVGEGKAVEIAEAVKATGAAVVLFDHALSPAQERNLERLCECRVIDRTGLILDIFAQRARTHEGKLQVELAQLRHLATRLVRGWTHLERRKGGIGSRGPGETQLEADRRLLRNRIVQIQSRLEKVEKQREQGRQSRIKADVPTVSLVGYTNAGKSTLFNQITEARVYAADQLFATLDPTLRRIDVADVGETVLADTVGFIRHLPYDLVAAFKATLQETRQATQLLHVVDAADVRVQENIEAVNTVLEEIDAHEFPTLMVMNKIDMLDDFEPRIDRDEENKPIRVWLSAQSGVGIPQLFQALTERLSGEVAQHTLRLLPQEGRLRSRFYQLQAIEKEWMEEDGSVSPQVRMPIVDWRRFCKQEPALIEYVI* ->6 -VAQRILVLGASGYIGQHLVFALSQQGHQVRAAARRVERLEKHRLANVSCHKVDLHWPENLPALLRDIDTVYYLVHGMGEGGDFIAHERQAALNVRDALRQTPVKQLIFLSSLQAPAHEQSDHLRARQLTADTLRDAGVPVTELRAGIIVGAGSAAFEVMRDMVYNLPILTPPRWVRSRTTPIALENLLYYLVGLLEHPAHEHRILEAAGPQVLSYQQQFERFMAVSGKRRPLIPVPFPTRWISVWFLNVITSVPPTTAKALIQGLRHDLLADDAALKKLIPQTLITFDDAVRRTLKEEEKLVNSSDWGYDALAFARWRPEYGYFPKQAGFTAQTPASLSALWQVVNRLGGKEGYFFGNILWQTRAAMDRLVGHKLAKGRPSHTLLKPGDTVDSWKVIIVEPEKQLTLLFGMKAPGLGRLSFTLHDKGRYREIDVRAWWHPHGMPGLIYWLLMIPAHLFIFRGMARRIARLAEQITEK* ->7 -MNKFARHFLPLSLRVRFLLATAGVVLVLSLAYGIVALVGYSVSFDKTTFRLLRGESNLFYTLAKWENNKICVELPENLDMQSPTMTVIYDETGKLLWTQRNIPWLIKSTQPEGLKTNGFHEIETNVDATSTLLSEDHSAQEKLKEVREDDDDAEMTHSVAVNIYPATTRMPQLTIVVVDTIPIELKGSYMVWSWFVYVLAANLLLVIPLLWIAAWWSLRPIDALSREVRELEDHHREMLNPETTRKLISLVRNLNQLLKSEHERYNKYRTSLTDLTHSLKTPLAVLQSTLRSLRNEKMSVSKAEPVMLEQISRISQQIGYYLHRASMRGSGVLLSRELHPVAPLLDNLISALNKVYLRKGVNISMDISPEISFVGEQNDFVEVMGNVLDNACKYCLEFVEISARQTDDHLHIFVEDDGPGIAHSKRSLVFDRGQRADTLRPGQGVGLAVAREITEQYAGQIIASDSLLGGARMEVVSGRQHPTQKEE* ->8 -MTIQKRLLEAVEQKLLRPIDAQFALTVAGNDDPAVTLAAALLSHDAGEGHVCLPLSRLTLTEEAHPLLVAWISETATPIDWKKRLLASAAVSCGDSPAPLILCGDRLYLNRMWCNERTVARFFNEVNQAIAVDEDQLSRILDALFPPTDEVNWQKVAAAVALTRRISVISGGPGTGKTTTVAKLLAALIQMADGERCRIRLAAPTGKAAARLTESLGAALRQLPLTDAQKKRIPEDASTLHRLLGAQPGSQRLRHHAGNPLHLDVLVVDEASMIDLPMMSRLIDALPPHGRVIFLGDRDQLASVEAGAVLGDICAYVNAGFTAERARQLSRLTGSAIPAGAGTQAASLRDSLCLLQKSYRFGSDSGIGKLAAAINCGDRSAIQAVFQQGFSDIEKRTLQSSDDYAGMLDEALAGYGRYLRLLHEKAAPEAILQAFNEYQLLCALREGPFGVRGLNDRIEQAMVQQRKIQRHPHSRWYEGRPVMIARNDSALGLFNGDIGIALDRGQGLRVWFVMPDGTIKSVQPSRLPEHDTTWAMTVHKSQGSEFDHAALILPSQRSPVVTRELVYTAVTRARRRLSLYADERILAGAIVTRTERRSGLATLFDEVSRIG* ->9 -MQEVAMSSQEASKMLRTYNIAWWGNNYYDVNELGHISVCPDPDVPEARVDLAKLVKAREAQGQRLPALFCFPQILQHRLRSINAAFKRARESYGYNGDYFLVYPIKVNQHRRVIESLIHSGEPLGLEAGSKAELMAVLAHAGMTRSVIVCNGYKDREYIRLALIGEKMGHKVYLVIEKMSEIAIVLEEAERLNVVPRLGVRARLASQGSGKWQSSGGEKSKFGLAATQVLQLVETLRDAGRLDSLQLLHFHLGSQMANIRDIATGVRESARFYVELHKLGVNIQCFDVGGGLGVDYEGTRSQSDCSVNYGLNEYANNIIWAIGDACEEHGLPHPTVITESGRAVTAHHTVLVSNIIGVERNEYTDPTAPAEDAPRALQNLWETWQEMHKPGTRRSLREWLHDSQMDLHDIHIGYSSGAFSLQERAWAEQLYLSMCHEVQKQLDPQNRAHRPIIDELQERMADKMYVNFSLFQSMPDAWGIDQLFPVLPLEGLDQVPERRAVLLDITCDSDGAIDHYIDGDGIATTMPMPEYDPENPPMLGFFMVGAYQEILGNMHNLFGDTEAVDVFVFPDGSVEVELSDEGDTVADMLQYVQLDPKTLLTHFRDQVKQTDLDDALQQQFLEEFEAGLYGYTYLEDE* ->10 -MNSLPQRSTDFELTTSQDGFALSWQQRLILRHSAENPCLWIGAGVADIDMFRGNFSIKDKLNEKIALTEATVSELPDGWLVQFSRGATISATLRISADEAGRLTLDLQNDDLHHNRIWLRLAANPDDHIYGCGEQFSYFDLRGKPFPLWTSEQGVGRNKTSYVTWQADCKENAGGDYYLTFFPQPTFVSTQKYYCHVDNSCYMNFDFSAPEYHELALWEDKTTLRFECADTYIALLEKLTALLGRQPELPDWVYDGVTLGIQGGTEVCQQKLDNMRNAGVKVYGIWAQDWSGIRMTSFGKRVMWNWKWNSDNYPQLDSRIKQWKEEGVQFLSYINPYVASDKDLCAEAARHGYLAKDATGGDYLVEFGEFYGGVVDLTNPEAYDWFKDVIKKNMIALGCSGWMADFGEYLPTDTYLHNGVSAELMHNAWPALWAKCNYEALQKTGKLGEILFFMRAGYTGSQKYSTMMWAGDQNVDWSLDDGLASVVPAALSLAMTGHGLHHSDIGGYTTLFDMKRSKELLLRWCDFSAFTPMMRTHEGNRPSNDWQFDGDAETIAHFARMTTVFTTLKPYLKQAVAQNAATGLPVMRPLFLHYENDAATYTLKYQYLLGQDLLVAPVHEQGRCDWTLYLPEDHWVNIWTGEAHHGGEISVDAPIGKPPVFYRAKSEWALLFASLRNI* ->11 -MKLFLTTAALTATLTSGMGFASDTVIPWATNSGGTESTHIAAMGEDVNAQHQHIIHTHEGVCAANSGTIQADEAALTSNKPPVQVQPELLPHQG* ->12 -MKHLRVVACMIMLALAGCDNNDKTAPTTKSEAPAVAQPSPAQDPSQLQKLAQQSQGKALTLLDASEAQLDGAATLVLTFSIPLDPEQDFSRVVHVVDKKSGSVDGAWELAPNLKELRLRHLEPERVLVVTVDPAVKALNNATFGKSYEKTITTRDVQPSVGFASRGSLLPGKIAEGLPVMALNVNHVDVNFFRVKPGSLASFVSQWEYRSSLSNWESDNLLKMADLVYTGRFDLNPARNTREKLLLPLSDIKPLQQAGVYVAVMNQAGHYNYSNAATLFTLSDIGVSAHRYHSRLDIFTQSLENGAAQSGIEIVLLNDKGQTLAQATSDAQGHVQLEADKAAALLLARKEGQTTLLDLTLPALDLSEFNVAGAPGYSKQFFMFGPRDLYRPGETVILNGLLRDSDGKTLPDQPVKLEVVKPDGQVMRTVVSQPENGLYRLNYPLDINAPTGLWHVRANTGDNLLRSWDFHVEDFMPERMALNLTAQKTPLAPADEVKFSVVGYYLYGAPANGNTLQGQLFLRPLRDAVAALPGFQFGNIAEENLSRSLDEVQLTLDKGGRGEVSAASQWQEAHSPLQVILQASLLESGGRPVTRRVEQAIWPADTLPGIRPQFAAKAVYDYRTDTTVNQPIVDEDSNAAFDIVYANAQGEKKAVSGLQVRLIRERRDYYWNWSESEGWQSQFDQKDLVEGEQTLDLNADETGKVSFPVEWGAYRLEVKAPNETVSSVRFWAGYSWQDNSDGSGAARPDRVTLKLDKANYRPGDTMKLHIAAPVAGKGYAMVESSDGPLWWQAIDVPAQGLELTIPVDKTWNRHDLYLSTLVVRPGDKSRSATPKRAVGLLHLPLGDDNRRLDLALESPAKMRPNQPLTVRVKASVKHGEMPKQINVLVSAVDSGVLNITDYATPDPWQAFFGQKRYGADIYDIYGQVIEGQGRLAALRFGGDGDDLTRGGKPPVNHANIIAQQAQPITLNEQGEGVVTLPIGDFNGELRVMAQAWTADDFGRGESKVVVAAPVIAELNMPRFLAGGDVSRLVLDVTNLTDRPQTLNIALAASGLLELLSQQPQPVNLAPGVRTTLFVPVRALEGFGEGEIQATISGLNLPGETLGAQHKQWQIGVRPAWPAQTVNSGIALAPGESWHVPEQHLANVSPATLQGQLLLSGKPPLNLARYIRELKAYPYGCLEQTTSGLFPALYTNAAQLQSLGITGDSDEKRRAAVDIGISRILQMQRDNGGFALWDENGAEEPWLTAYAMDFLIRAGEQGYSVPPEAINRGNERLLRYLQDPGTMLIRYSDNTQASTFAAQAYAALVLARQQKAPLGALREIWERRSQAASGLPLMQLGIALNTMGDARRGEEAITLALNTPRQDERQWIADYGSSLRDNALMLSLLEENNLRPDAQNALLSSLSEQAFGQRWLSTQENNALFLAAHSRQASAGAWQAQTSLEAQPLSGDKALTRNLDADQLAALEVTNTGSQPLWLRLDSSGYPSSAPEPASNVLQIERQILGTDGQRKSLSSLRSGELVLVWLTVVADRNVPDALVVDLLPAGLELENQNLADSSASLPESGSEVQNLLNQMQQADIQYMEFRDDRFVAAVVVNEGQPVTLVYLARAVTPGTYQLAQPQVESMYAPQWRATGASEGLLIVTP* ->13 -MAKNRSRRLRKKMHIDEFQELGFSVAWRFPEGTSEEQIDKTVDDFINDVIEPNKLAFDGSGYLAWEGLICMQEIGKCTEEHQAIVRKWLEARNLEEVRTSELFDVWWD* ->14 -LKLIPFYLLALFSASSGATEINACKDLIGTWKTPADNPPYTVTILPPVEACGEKCVKLNVEYELDVTHRNALYCHERQEGVKGQGPMVIAFEGAYGGHAIGTYNRQLQLLRAGVIPKNKKWKWITKMENYWFRRVKAH* ->15 -MKKFDSCHPVFLLIGCAQVPLRSSVSKPVQQPSAQKEQLANANGIDECQSLPYVPSDLAKNKSLSNQIADNTASKNSAISSRIFCEKYKQTKEQALTFFQELPQYMRSKEVEEQHMTEFKKVLLEPGSKNLSIYQTLLAAHERLQAL* ->16 -MRIKPDDNWRWYYDEEHDRMMLDLANGMLFRSRFSRKMLTPDAFCPTGFCVDDAALYFSFEEKCRDFELTKEQRAELVLNALVAIRYLKPQMPKSWHFVAHGEMWTPGTGDAASVWLSDTAEQVNLLVVEPGENAALCLLAQPGVVIAGRTMQLGDAIKIMNDRLKPQVHCHSFSLEQAV* ->17 -VLAFTLRFIKNKRYFAILAGALVIIAGLDSQHACSGNGLPQINGKALAALAKQHPVVVLFRHAERCDRSDNTCLPDSTGITVNGAQDARALGKAFSADIQNYNLYSSNTVRAIQSATWFSAGRSRRAVKKMMDCGSGIYSSINTLLKNSQIKNIVILTHNHCLTYIVKNKRGVKFDPEYLNALVMYAENGKLLLDGEFVPG* ->18 -MGSNYIVIEGLEGAGKTTARDVVVETLEQLGIRNMIFTREPGGTQLAEKLRSLVLDIRSVGDEVITDKAEVLMFYAARVQLVETVIKPALAQGVWVIGDRHDLSTQAYQGGGRGIDQTMLATLRDAVLGDFRPDLTLYLDVTPEVGLKRARARGDLDRIEQESFDFFNRTRARYLELAAQDSRIRTIDATQPLDAVMRDIRATVTKWVQEQAA* ->19 -MKHIKKSVLVVLLTSHVAHASIVVGGTRLVFDGNNDESSINVENKDSKANLVQSWLSVADPQVTNKQVFIITPPLFRLDAGQKNSIRVIRSGAPLPADRESMYWLNIKGIPSIDDNASANRVEISINTQIKLIYRPPALTKSTPDSQSQQLKWQTAGDVITVNNPTPYYMNFASVTLNSHEVKSATFVPPKSSASFKLSSTAAPHGTVTWRLISDYGMSLEPHSGSF* diff --git a/tests/test_data/outputs/search/G4/run.json b/tests/test_data/outputs/search/G4/run.json index 3c42732..683c034 100755 --- a/tests/test_data/outputs/search/G4/run.json +++ b/tests/test_data/outputs/search/G4/run.json @@ -1,11 +1,15 @@ { - "analysis_start_time": "10/06/2024 11:11:51", + "analysis_start_time": "19/06/2024 10:43:41", "parameters": { - "query": "locidex/extract/G4/raw.extracted.seqs.fasta", - "outdir": "locidex/search/G4", + "command": "search", + "query": "test_dev/extract/G4/raw.extracted.seqs.fasta", + "outdir": "test_dev/search/G4", + "db": "test_set/db", + "db_group": null, "name": "G4", - "db": "locidex/db", "config": null, + "db_name": "Locidex Test Database", + "db_version": "1.0.0", "min_evalue": 0.0001, "min_dna_len": 1, "min_aa_len": 1, @@ -20,8 +24,16 @@ "format": null, "translation_table": 11, "annotate": false, - "force": true + "force": true, + "db_date": "2024/06/06", + "db_author": "James Robertson", + "db_desc": "Using Salmonella GCA_000007545.2 as source data", + "db_num_seqs": 20, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" }, - "result_file": "locidex/search/G4/seq_store.json", - "analysis_end_time": "10/06/2024 11:11:53" + "result_file": "test_dev/search/G4/seq_store.json", + "analysis_end_time": "19/06/2024 10:43:41" } \ No newline at end of file diff --git a/tests/test_data/outputs/search/G4/seq_store.json b/tests/test_data/outputs/search/G4/seq_store.json index 3238da5..9004a47 100755 --- a/tests/test_data/outputs/search/G4/seq_store.json +++ b/tests/test_data/outputs/search/G4/seq_store.json @@ -460,261 +460,301 @@ "parent_id": "locus_1:0:0:0", "locus_name": "locus_1:0:0:0", "seq_id": "locus_1:0:0:0", + "dna_seq": "atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa", + "dna_ambig_count": 0, "dna_hash": "d17b02d12afa7f832ee37df6f24a8f55", "dna_len": 102, + "aa_seq": "MYDPPFLEALMITASFFAIFIIIVVSVLLLEGD*", "aa_hash": "a931d1f75114576e60538364eb01a05f", "aa_len": 34, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "1": { "parent_id": "locus_10:9:0:1", "locus_name": "locus_10:9:0:1", "seq_id": "locus_10:9:0:1", + "dna_seq": "atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa", + "dna_ambig_count": 0, "dna_hash": "fe04d17ec353c08b903c85fc0ca4dc02", "dna_len": 762, + "aa_seq": "MRILLSNDDGVHAPGIQTLAKALREFADVQVVAPDRNRSGASNSLTLESSLRTFTFDNGDIAVQMGTPTDCVYLGVNALMRPRPDIVVSGINAGPNLGDDVIYSGTVAAAMEGRHLGFPALAVSLNGYQHYDTAAAVTCALLRGLSREPLRTGRILNVNVPDLPLAQVKGIRVTRCGSRHPADKVIPQEDPRGNTLYWIGPPGDKYDAGPDTDFAAVDEGYVSVTPLHVDLTAHSAHDVVSDWLDSVGVGTQW*", "aa_hash": "988bf512f0362e276b0e5622fbaa7079", "aa_len": 254, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "2": { "parent_id": "locus_11:10:0:2", "locus_name": "locus_11:10:0:2", "seq_id": "locus_11:10:0:2", + "dna_seq": "gtgaacctggtgaaaaccccgggtctgcacgcggcgggcaaaggcattaacgttgccaatgtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccaacttcaactttcccggctttgacgtgactcccgcagactgggaacgctttgttaacgacttcctgagctggctgggtcagttcgatatggtctgcgtaagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatcgttgatactagccgtgtagagttagtcgccggtcttatagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcccgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgcgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggaaaataccgtcggcgcaggcggttccatggttggcggatggatttacggcctgctgatgcgcgagtccaccgaacatacgctgcgcctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga", + "dna_ambig_count": 0, "dna_hash": "5b128d659955716833ce42f2bb060212", "dna_len": 858, + "aa_seq": "VNLVKTPGLHAAGKGINVANVLKDLGIDVTVGGFLGKDNQDGFQQLFSELGIANRFQVVQGRTRINVKLTEKDGEVTNFNFPGFDVTPADWERFVNDFLSWLGQFDMVCVSGSLPAGVSPEAFTDWMTRLRSQCPCIIVDTSRVELVAGLIAAPWLVKPNRRELEIWAGRKLPEMKDVIDAAPALREQGIAHVVISLGAEGALRVNASGEWIAKPPAVDVENTVGAGGSMVGGWIYGLLMRESTEHTLRLATAVAALAVSQSNVGITDRPQLAAMMARVDLQPFN*", "aa_hash": "d6a46f107d0604f27820147b523948c8", "aa_len": 286, "start_codon": "gtg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "3": { "parent_id": "locus_12:11:0:3", "locus_name": "locus_12:11:0:3", "seq_id": "locus_12:11:0:3", + "dna_seq": "atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga", + "dna_ambig_count": 0, "dna_hash": "eb72da68c159497d5f0c8eeddc51b5ae", "dna_len": 972, + "aa_seq": "MNKNKYSTPLLMLATILAGMLSPMQSAVNGQLGHWLQDGNACAVISFASGLVVMFFIIIARKETRQQFASIPTLIKKRKIPLWNWFAGLCGAMVVFSEGASASALGVATFQTALISALLLSGLLCDRFGIGVEEKKYFTPWRITGALFAVIATIFVVSPQWHSTSFILLAILPFLAGLLAGWQPAGNAKVAEATGSMLVSITWNFIVGFCVLGAALAIRIALGHVTIQLPDTWWMYLGGPLGLLSIGLMAILVRGLGLLMLGVASTAGQLLGSVLIDELIPSLGNTVYLVTIIGTLFALVGAIVTTIPEYRASKMAQKMEVSE*", "aa_hash": "2fb77f6d0f615a030a08af74f3eda277", "aa_len": 324, "start_codon": "atg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "4": { "parent_id": "locus_13:12:0:4", "locus_name": "locus_13:12:0:4", "seq_id": "locus_13:12:0:4", + "dna_seq": "atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga", + "dna_ambig_count": 0, "dna_hash": "8f300259dcb46224bdc1fe5273107324", "dna_len": 1098, + "aa_seq": "MKGRWAKYVATGVMLAMLAACSSKPTDRGQQYKDGKFTQPFSLVNQPDAVGAPINAGDFAEQVDQIRSASPRLYTNQSNVYNAVQNWLRSGGDTRTMRQFGIDAWQMEGTDNYGNVQFTGYYTPVVQARHTRQGAFQYPIYSMPPKRGRLPSRAQIYAGALSDKYILAWSNSLMDNFIMDVQGSGYIDFGDGSPLNFFSYAGKNGWPYRSIGKVLIDRGEVKKEDMSMQAIREWGEKHSEAEVRELLEQNPSFVFFKPQSFAPVKGASAVPLIGRASVASDRSIIPPGTTLLAEVPLLDNNGKFSGQYELRLMVALDVGGAIKGQHFDIYQGIGPDAGHRAGWYNHYGRVWVLKSAPGAGNVFSG*", "aa_hash": "fd5140611af6e0d9f95426bf9beaa8b1", "aa_len": 366, "start_codon": "atg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "5": { "parent_id": "locus_14:13:0:5", "locus_name": "locus_14:13:0:5", "seq_id": "locus_14:13:0:5", + "dna_seq": "ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctattacgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccagcgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcggaaaggcgggattggttcgcgcggtccgggtgaaacccagctcgaagccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgagaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgcagctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaattccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtccgcaggtacgaatgccgattgtcgactggcgtcgcttctgtaaacaagaaccagcgttgatcgaatacgtgatctag", + "dna_ambig_count": 0, "dna_hash": "b9060019038526aa6fc38d2f7510edc6", "dna_len": 1281, + "aa_seq": "LFDRYDAGEQAVLVHIYFSQDKDMEDLQEFESLVYYAGVEAMQVITGSRKAPHPKYFVGEGKAVEIAEAVKATGAAVVLFDHALSPAQERNLERLCECRVIDRTGLILDIFAQRARTHEGKLQVELAQLRHLATRLVRGWTHLERRKGGIGSRGPGETQLEADRRLLRNRIVQIQSRLEKVEKQREQGRQSRIKADVPTVSLVGYTNAGKSTLFNQITEARVYAADQLFATLDPTLRRIDVADVGETVLADTVGFIRHLPYDLVAAFKATLQETRQATQLLHVVDAADVRVQENIEAVNTVLEEIDAHEFPTLMVMNKIDMLDDFEPRIDRDEENKPIRVWLSAQSGVGIPQLFQALTERLSGEVAQHTLRLLPQEGRLRSRFYQLQAIEKEWMEEDGSVSPQVRMPIVDWRRFCKQEPALIEYVI*", "aa_hash": "05bc7823b1abc2e6d4e2c08ca5325134", "aa_len": 427, "start_codon": "ttg", - "stop_codon": "tag", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tag", + "count_internal_stop": 0 }, "6": { "parent_id": "locus_15:14:0:6", "locus_name": "locus_15:14:0:6", "seq_id": "locus_15:14:0:6", + "dna_seq": "gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga", + "dna_ambig_count": 0, "dna_hash": "bc98c2fe196a68a79036814396513a8d", "dna_len": 1434, + "aa_seq": "VAQRILVLGASGYIGQHLVFALSQQGHQVRAAARRVERLEKHRLANVSCHKVDLHWPENLPALLRDIDTVYYLVHGMGEGGDFIAHERQAALNVRDALRQTPVKQLIFLSSLQAPAHEQSDHLRARQLTADTLRDAGVPVTELRAGIIVGAGSAAFEVMRDMVYNLPILTPPRWVRSRTTPIALENLLYYLVGLLEHPAHEHRILEAAGPQVLSYQQQFERFMAVSGKRRPLIPVPFPTRWISVWFLNVITSVPPTTAKALIQGLRHDLLADDAALKKLIPQTLITFDDAVRRTLKEEEKLVNSSDWGYDALAFARWRPEYGYFPKQAGFTAQTPASLSALWQVVNRLGGKEGYFFGNILWQTRAAMDRLVGHKLAKGRPSHTLLKPGDTVDSWKVIIVEPEKQLTLLFGMKAPGLGRLSFTLHDKGRYREIDVRAWWHPHGMPGLIYWLLMIPAHLFIFRGMARRIARLAEQITEK*", "aa_hash": "5d8c3cf06444c3741ebfc57cc92db0d3", "aa_len": 478, "start_codon": "gtg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "7": { "parent_id": "locus_16:15:0:7", "locus_name": "locus_16:15:0:7", "seq_id": "locus_16:15:0:7", + "dna_seq": "atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctcgtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatctgcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacggtgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcactcaaccggaagggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaaggctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgatgcgctgtcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtaagctgatcagccttgtgcgcaaccttaatcaactgctcaaaagcgagcacgaacgttataacaaataccgcacgagcctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatctgcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcatagcccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttctggccgacagcatcccacacagaaagaggaataa", + "dna_ambig_count": 0, "dna_hash": "16e55766c603fe33c9e75d8e81743ae2", "dna_len": 1464, + "aa_seq": "MNKFARHFLPLSLRVRFLLATAGVVLVLSLAYGIVALVGYSVSFDKTTFRLLRGESNLFYTLAKWENNKICVELPENLDMQSPTMTVIYDETGKLLWTQRNIPWLIKSTQPEGLKTNGFHEIETNVDATSTLLSEDHSAQEKLKEVREDDDDAEMTHSVAVNIYPATTRMPQLTIVVVDTIPIELKGSYMVWSWFVYVLAANLLLVIPLLWIAAWWSLRPIDALSREVRELEDHHREMLNPETTRKLISLVRNLNQLLKSEHERYNKYRTSLTDLTHSLKTPLAVLQSTLRSLRNEKMSVSKAEPVMLEQISRISQQIGYYLHRASMRGSGVLLSRELHPVAPLLDNLISALNKVYLRKGVNISMDISPEISFVGEQNDFVEVMGNVLDNACKYCLEFVEISARQTDDHLHIFVEDDGPGIAHSKRSLVFDRGQRADTLRPGQGVGLAVAREITEQYAGQIIASDSLLGGARMEVVSGRQHPTQKEE*", "aa_hash": "f85b3701f5642454bf4d2263feb13354", "aa_len": 488, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "8": { "parent_id": "locus_17:16:0:8", "locus_name": "locus_17:16:0:8", "seq_id": "locus_17:16:0:8", + "dna_seq": "atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa", + "dna_ambig_count": 0, "dna_hash": "a0d97d985483413f3c18bfe5833ae9ce", "dna_len": 1836, + "aa_seq": "MTIQKRLLEAVEQKLLRPIDAQFALTVAGNDDPAVTLAAALLSHDAGEGHVCLPLSRLTLTEEAHPLLVAWISETATPIDWKKRLLASAAVSCGDSPAPLILCGDRLYLNRMWCNERTVARFFNEVNQAIAVDEDQLSRILDALFPPTDEVNWQKVAAAVALTRRISVISGGPGTGKTTTVAKLLAALIQMADGERCRIRLAAPTGKAAARLTESLGAALRQLPLTDAQKKRIPEDASTLHRLLGAQPGSQRLRHHAGNPLHLDVLVVDEASMIDLPMMSRLIDALPPHGRVIFLGDRDQLASVEAGAVLGDICAYVNAGFTAERARQLSRLTGSAIPAGAGTQAASLRDSLCLLQKSYRFGSDSGIGKLAAAINCGDRSAIQAVFQQGFSDIEKRTLQSSDDYAGMLDEALAGYGRYLRLLHEKAAPEAILQAFNEYQLLCALREGPFGVRGLNDRIEQAMVQQRKIQRHPHSRWYEGRPVMIARNDSALGLFNGDIGIALDRGQGLRVWFVMPDGTIKSVQPSRLPEHDTTWAMTVHKSQGSEFDHAALILPSQRSPVVTRELVYTAVTRARRRLSLYADERILAGAIVTRTERRSGLATLFDEVSRIG*", "aa_hash": "bdd2b967a7a6337b5973dd723a62fa43", "aa_len": 612, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "9": { "parent_id": "locus_18:17:0:9", "locus_name": "locus_18:17:0:9", "seq_id": "locus_18:17:0:9", + "dna_seq": "atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag", + "dna_ambig_count": 0, "dna_hash": "b3021e979faa7600756c06dfadfcf14c", "dna_len": 1914, + "aa_seq": "MQEVAMSSQEASKMLRTYNIAWWGNNYYDVNELGHISVCPDPDVPEARVDLAKLVKAREAQGQRLPALFCFPQILQHRLRSINAAFKRARESYGYNGDYFLVYPIKVNQHRRVIESLIHSGEPLGLEAGSKAELMAVLAHAGMTRSVIVCNGYKDREYIRLALIGEKMGHKVYLVIEKMSEIAIVLEEAERLNVVPRLGVRARLASQGSGKWQSSGGEKSKFGLAATQVLQLVETLRDAGRLDSLQLLHFHLGSQMANIRDIATGVRESARFYVELHKLGVNIQCFDVGGGLGVDYEGTRSQSDCSVNYGLNEYANNIIWAIGDACEEHGLPHPTVITESGRAVTAHHTVLVSNIIGVERNEYTDPTAPAEDAPRALQNLWETWQEMHKPGTRRSLREWLHDSQMDLHDIHIGYSSGAFSLQERAWAEQLYLSMCHEVQKQLDPQNRAHRPIIDELQERMADKMYVNFSLFQSMPDAWGIDQLFPVLPLEGLDQVPERRAVLLDITCDSDGAIDHYIDGDGIATTMPMPEYDPENPPMLGFFMVGAYQEILGNMHNLFGDTEAVDVFVFPDGSVEVELSDEGDTVADMLQYVQLDPKTLLTHFRDQVKQTDLDDALQQQFLEEFEAGLYGYTYLEDE*", "aa_hash": "42c4a831ee79a27c47138fe96829814b", "aa_len": 638, "start_codon": "atg", - "stop_codon": "tag", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tag", + "count_internal_stop": 0 }, "10": { "parent_id": "locus_19:18:0:10", "locus_name": "locus_19:18:0:10", "seq_id": "locus_19:18:0:10", + "dna_seq": "atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacatcgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccacagtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggtcgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattacttgaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcatacagggcggtacggaagtttgtcagcaaaaactggataacatgcgcaacgcaggcgtaaaagtgtacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggggtcagcgcagagctcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacagaagaccggcaagctcggcgagatcctgttctttatgcgggcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccagcaatgactggcagttcgacggcgacgcggaaactattgcacactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgagatttccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa", + "dna_ambig_count": 0, "dna_hash": "a012eee23637b48e39b00808a057e35d", "dna_len": 2037, + "aa_seq": "MNSLPQRSTDFELTTSQDGFALSWQQRLILRHSAENPCLWIGAGVADIDMFRGNFSIKDKLNEKIALTEATVSELPDGWLVQFSRGATISATLRISADEAGRLTLDLQNDDLHHNRIWLRLAANPDDHIYGCGEQFSYFDLRGKPFPLWTSEQGVGRNKTSYVTWQADCKENAGGDYYLTFFPQPTFVSTQKYYCHVDNSCYMNFDFSAPEYHELALWEDKTTLRFECADTYIALLEKLTALLGRQPELPDWVYDGVTLGIQGGTEVCQQKLDNMRNAGVKVYGIWAQDWSGIRMTSFGKRVMWNWKWNSDNYPQLDSRIKQWKEEGVQFLSYINPYVASDKDLCAEAARHGYLAKDATGGDYLVEFGEFYGGVVDLTNPEAYDWFKDVIKKNMIALGCSGWMADFGEYLPTDTYLHNGVSAELMHNAWPALWAKCNYEALQKTGKLGEILFFMRAGYTGSQKYSTMMWAGDQNVDWSLDDGLASVVPAALSLAMTGHGLHHSDIGGYTTLFDMKRSKELLLRWCDFSAFTPMMRTHEGNRPSNDWQFDGDAETIAHFARMTTVFTTLKPYLKQAVAQNAATGLPVMRPLFLHYENDAATYTLKYQYLLGQDLLVAPVHEQGRCDWTLYLPEDHWVNIWTGEAHHGGEISVDAPIGKPPVFYRAKSEWALLFASLRNI*", "aa_hash": "cb1202450e68e2b4f0d557a645f1a98d", "aa_len": 679, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "11": { "parent_id": "locus_2:1:0:11", "locus_name": "locus_2:1:0:11", "seq_id": "locus_2:1:0:11", + "dna_seq": "atgaaactttttctgacaacggcagcgctaactgcgaccttgacttccggcatgggatttgccagcgataccgtgatcccctgggcgactaacagtggcggtacggaaagcacgcatatcgcggcaatgggcgaggatgtgaatgcgcagcatcagcacatcatccacacgcatgaaggcgtatgtgccgcaaattccggcaccattcaggctgatgaagccgcgctgaccagcaacaagccaccggtacaagtccaaccggaattgttgccgcatcagggctaa", + "dna_ambig_count": 0, "dna_hash": "e35184c8ff18e9116fc8faef20532f56", "dna_len": 285, + "aa_seq": "MKLFLTTAALTATLTSGMGFASDTVIPWATNSGGTESTHIAAMGEDVNAQHQHIIHTHEGVCAANSGTIQADEAALTSNKPPVQVQPELLPHQG*", "aa_hash": "2a1a77c25ad681437705d9145aef608c", "aa_len": 95, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "12": { "parent_id": "locus_20:19:0:12", "locus_name": "locus_20:19:0:12", "seq_id": "locus_20:19:0:12", + "dna_seq": "atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa", + "dna_ambig_count": 0, "dna_hash": "4461918e985715e4a2b07494e1f91326", "dna_len": 4935, + "aa_seq": "MKHLRVVACMIMLALAGCDNNDKTAPTTKSEAPAVAQPSPAQDPSQLQKLAQQSQGKALTLLDASEAQLDGAATLVLTFSIPLDPEQDFSRVVHVVDKKSGSVDGAWELAPNLKELRLRHLEPERVLVVTVDPAVKALNNATFGKSYEKTITTRDVQPSVGFASRGSLLPGKIAEGLPVMALNVNHVDVNFFRVKPGSLASFVSQWEYRSSLSNWESDNLLKMADLVYTGRFDLNPARNTREKLLLPLSDIKPLQQAGVYVAVMNQAGHYNYSNAATLFTLSDIGVSAHRYHSRLDIFTQSLENGAAQSGIEIVLLNDKGQTLAQATSDAQGHVQLEADKAAALLLARKEGQTTLLDLTLPALDLSEFNVAGAPGYSKQFFMFGPRDLYRPGETVILNGLLRDSDGKTLPDQPVKLEVVKPDGQVMRTVVSQPENGLYRLNYPLDINAPTGLWHVRANTGDNLLRSWDFHVEDFMPERMALNLTAQKTPLAPADEVKFSVVGYYLYGAPANGNTLQGQLFLRPLRDAVAALPGFQFGNIAEENLSRSLDEVQLTLDKGGRGEVSAASQWQEAHSPLQVILQASLLESGGRPVTRRVEQAIWPADTLPGIRPQFAAKAVYDYRTDTTVNQPIVDEDSNAAFDIVYANAQGEKKAVSGLQVRLIRERRDYYWNWSESEGWQSQFDQKDLVEGEQTLDLNADETGKVSFPVEWGAYRLEVKAPNETVSSVRFWAGYSWQDNSDGSGAARPDRVTLKLDKANYRPGDTMKLHIAAPVAGKGYAMVESSDGPLWWQAIDVPAQGLELTIPVDKTWNRHDLYLSTLVVRPGDKSRSATPKRAVGLLHLPLGDDNRRLDLALESPAKMRPNQPLTVRVKASVKHGEMPKQINVLVSAVDSGVLNITDYATPDPWQAFFGQKRYGADIYDIYGQVIEGQGRLAALRFGGDGDDLTRGGKPPVNHANIIAQQAQPITLNEQGEGVVTLPIGDFNGELRVMAQAWTADDFGRGESKVVVAAPVIAELNMPRFLAGGDVSRLVLDVTNLTDRPQTLNIALAASGLLELLSQQPQPVNLAPGVRTTLFVPVRALEGFGEGEIQATISGLNLPGETLGAQHKQWQIGVRPAWPAQTVNSGIALAPGESWHVPEQHLANVSPATLQGQLLLSGKPPLNLARYIRELKAYPYGCLEQTTSGLFPALYTNAAQLQSLGITGDSDEKRRAAVDIGISRILQMQRDNGGFALWDENGAEEPWLTAYAMDFLIRAGEQGYSVPPEAINRGNERLLRYLQDPGTMLIRYSDNTQASTFAAQAYAALVLARQQKAPLGALREIWERRSQAASGLPLMQLGIALNTMGDARRGEEAITLALNTPRQDERQWIADYGSSLRDNALMLSLLEENNLRPDAQNALLSSLSEQAFGQRWLSTQENNALFLAAHSRQASAGAWQAQTSLEAQPLSGDKALTRNLDADQLAALEVTNTGSQPLWLRLDSSGYPSSAPEPASNVLQIERQILGTDGQRKSLSSLRSGELVLVWLTVVADRNVPDALVVDLLPAGLELENQNLADSSASLPESGSEVQNLLNQMQQADIQYMEFRDDRFVAAVVVNEGQPVTLVYLARAVTPGTYQLAQPQVESMYAPQWRATGASEGLLIVTP*", "aa_hash": "ffe361d9273afbab0ed9009dd5aa4041", "aa_len": 1645, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "13": { "parent_id": "locus_3:2:0:13", "locus_name": "locus_3:2:0:13", "seq_id": "locus_3:2:0:13", + "dna_seq": "atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa", + "dna_ambig_count": 0, "dna_hash": "670705cd2a59c4a23a897ac656a888fe", "dna_len": 327, + "aa_seq": "MAKNRSRRLRKKMHIDEFQELGFSVAWRFPEGTSEEQIDKTVDDFINDVIEPNKLAFDGSGYLAWEGLICMQEIGKCTEEHQAIVRKWLEARNLEEVRTSELFDVWWD*", "aa_hash": "f46f4f7b2362abaf1440c9752e10670e", "aa_len": 109, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "14": { "parent_id": "locus_4:3:0:14", "locus_name": "locus_4:3:0:14", "seq_id": "locus_4:3:0:14", + "dna_seq": "ctgaaactaatacctttttatttgctcgcgctattttctgcctcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaacccctgcagataacccgccttataccgtgacaatattgccgccagtagaagcctgcggggaaaaatgtgtgaaactgaacgtagagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaacgacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctacaaccgacagctgcaattacttagggctggcgtaataccgaaaaataaaaagtggaaatggataacgaagatggaaaattactggttcaggcgggtaaaagcgcactaa", + "dna_ambig_count": 0, "dna_hash": "ac1b21798c0f672ad26f5a91ea278590", "dna_len": 417, + "aa_seq": "LKLIPFYLLALFSASSGATEINACKDLIGTWKTPADNPPYTVTILPPVEACGEKCVKLNVEYELDVTHRNALYCHERQEGVKGQGPMVIAFEGAYGGHAIGTYNRQLQLLRAGVIPKNKKWKWITKMENYWFRRVKAH*", "aa_hash": "dbcec3a0e9ecdc165c4e9162b079f2ee", "aa_len": 139, "start_codon": "ctg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "15": { "parent_id": "locus_5:4:0:15", "locus_name": "locus_5:4:0:15", "seq_id": "locus_5:4:0:15", + "dna_seq": "atgaaaaaatttgatagctgtcatcctgtctttttactgatcggctgtgcccaggtgcctctccgttcctccgtgagcaaaccggtacagcaacctagcgctcagaaagagcaactggccaacgcaaatggtattgatgagtgtcagtcacttccgtatgtgccttcagaccttgcgaagaataaatcattatcaaaccagatcgctgataataccgcatcaaaaaatagcgcaatcagctcacgcattttttgcgaaaaatataagcaaaccaaagaacaggcgcttaccttcttccaggaacttccacaatacatgcgttcgaaagaagttgaagagcaacacatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa", + "dna_ambig_count": 0, "dna_hash": "d00defcca8588f21ce16fa1d0ac13389", "dna_len": 444, + "aa_seq": "MKKFDSCHPVFLLIGCAQVPLRSSVSKPVQQPSAQKEQLANANGIDECQSLPYVPSDLAKNKSLSNQIADNTASKNSAISSRIFCEKYKQTKEQALTFFQELPQYMRSKEVEEQHMTEFKKVLLEPGSKNLSIYQTLLAAHERLQAL*", "aa_hash": "82d8baa0a3dad18a0efd8104ee15baae", "aa_len": 148, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "16": { "parent_id": "locus_6:5:0:16", "locus_name": "locus_6:5:0:16", "seq_id": "locus_6:5:0:16", + "dna_seq": "atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa", + "dna_ambig_count": 0, "dna_hash": "a11561f2804e2c32c78049f8b9aeb517", "dna_len": 543, + "aa_seq": "MRIKPDDNWRWYYDEEHDRMMLDLANGMLFRSRFSRKMLTPDAFCPTGFCVDDAALYFSFEEKCRDFELTKEQRAELVLNALVAIRYLKPQMPKSWHFVAHGEMWTPGTGDAASVWLSDTAEQVNLLVVEPGENAALCLLAQPGVVIAGRTMQLGDAIKIMNDRLKPQVHCHSFSLEQAV*", "aa_hash": "f7a904be6607ca5a6d9388ad9ca3693a", "aa_len": 181, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "17": { "parent_id": "locus_7:6:0:17", "locus_name": "locus_7:6:0:17", "seq_id": "locus_7:6:0:17", + "dna_seq": "gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggactcacaacatgcctgtagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgccagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcgccatccagtcggcaacctggttttcggcaggcaggtcgcgtagggcggttaaaaaaatgatggattgcggcagcggtatttattcttcaatcaatacgttactgaaaaattcgcagattaaaaatatagtcattttaacccataatcattgtttgacgtatattgtaaaaaataagcgcggcgtgaagtttgatccggagtatcttaatgcgctcgttatgtatgcggaaaacggaaaactccttttagacggtgaatttgttcccggctag", + "dna_ambig_count": 0, "dna_hash": "dc94bf1ec4ff9bed2a1f460cbd958656", "dna_len": 606, + "aa_seq": "VLAFTLRFIKNKRYFAILAGALVIIAGLDSQHACSGNGLPQINGKALAALAKQHPVVVLFRHAERCDRSDNTCLPDSTGITVNGAQDARALGKAFSADIQNYNLYSSNTVRAIQSATWFSAGRSRRAVKKMMDCGSGIYSSINTLLKNSQIKNIVILTHNHCLTYIVKNKRGVKFDPEYLNALVMYAENGKLLLDGEFVPG*", "aa_hash": "da78b534d889d8f35bec304ef54f1b93", "aa_len": 202, "start_codon": "gtg", - "stop_codon": "tag", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tag", + "count_internal_stop": 0 }, "18": { "parent_id": "locus_8:7:0:18", "locus_name": "locus_8:7:0:18", "seq_id": "locus_8:7:0:18", + "dna_seq": "atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga", + "dna_ambig_count": 0, "dna_hash": "7ebe74afecf146ec4db816c8deced64f", "dna_len": 642, + "aa_seq": "MGSNYIVIEGLEGAGKTTARDVVVETLEQLGIRNMIFTREPGGTQLAEKLRSLVLDIRSVGDEVITDKAEVLMFYAARVQLVETVIKPALAQGVWVIGDRHDLSTQAYQGGGRGIDQTMLATLRDAVLGDFRPDLTLYLDVTPEVGLKRARARGDLDRIEQESFDFFNRTRARYLELAAQDSRIRTIDATQPLDAVMRDIRATVTKWVQEQAA*", "aa_hash": "ad5a47cb3a55985083b963b0cf67f382", "aa_len": 214, "start_codon": "atg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "19": { "parent_id": "locus_9:8:0:19", "locus_name": "locus_9:8:0:19", "seq_id": "locus_9:8:0:19", + "dna_seq": "atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag", + "dna_ambig_count": 0, "dna_hash": "41ebb36872854b2b33c8c028e23d8ad1", "dna_len": 684, + "aa_seq": "MKHIKKSVLVVLLTSHVAHASIVVGGTRLVFDGNNDESSINVENKDSKANLVQSWLSVADPQVTNKQVFIITPPLFRLDAGQKNSIRVIRSGAPLPADRESMYWLNIKGIPSIDDNASANRVEISINTQIKLIYRPPALTKSTPDSQSQQLKWQTAGDVITVNNPTPYYMNFASVTLNSHEVKSATFVPPKSSASFKLSSTAAPHGTVTWRLISDYGMSLEPHSGSF*", "aa_hash": "02989d1dbb505d15f06e76ddc9174b7e", "aa_len": 228, "start_codon": "atg", - "stop_codon": "tag", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tag", + "count_internal_stop": 0 } }, "query_hit_columns": [], diff --git a/tests/test_data/outputs/search/G5/blast/nucleotide/hsps.txt b/tests/test_data/outputs/search/G5/blast/nucleotide/hsps.txt deleted file mode 100755 index d8538d1..0000000 --- a/tests/test_data/outputs/search/G5/blast/nucleotide/hsps.txt +++ /dev/null @@ -1,20 +0,0 @@ -0 0 102 102 1 102 1 102 103 1 97.087 100 100 plus 1.81e-46 172 -1 9 762 762 1 762 1 762 762 0 100.000 100 100 plus 0.0 1408 -2 10 858 858 1 858 1 858 858 19 97.786 100 100 plus 0.0 1480 -3 11 972 972 1 972 1 972 972 0 100.000 100 100 plus 0.0 1796 -4 12 1098 1098 1 1098 1 1098 1098 0 100.000 100 100 plus 0.0 2028 -5 13 1281 1281 1 1281 1 1281 1281 11 99.141 100 100 plus 0.0 2305 -6 14 1434 1434 1 1434 1 1434 1434 0 100.000 100 100 plus 0.0 2649 -7 15 1464 1464 1 1464 1 1464 1464 15 98.975 100 100 plus 0.0 2621 -8 16 1836 1836 1 1836 1 1836 1836 0 100.000 100 100 plus 0.0 3391 -9 17 1914 1914 1 1914 1 1914 1914 0 100.000 100 100 plus 0.0 3535 -10 18 2037 2037 1 2037 1 2037 2037 16 99.215 100 100 plus 0.0 3674 -11 1 285 285 1 285 1 285 285 17 94.035 100 100 plus 2.31e-124 433 -12 19 4935 4935 1 4935 1 4935 4935 0 100.000 100 100 plus 0.0 9114 -13 2 327 327 1 327 1 327 327 0 100.000 100 100 plus 5.34e-176 604 -14 3 417 417 1 417 1 417 417 11 97.362 100 100 plus 0.0 710 -15 4 444 444 1 444 1 444 444 15 96.622 100 100 plus 0.0 737 -16 5 543 543 1 543 1 543 543 0 100.000 100 100 plus 0.0 1003 -17 6 606 606 1 606 1 606 606 15 97.525 100 100 plus 0.0 1037 -18 7 642 642 1 642 1 642 642 0 100.000 100 100 plus 0.0 1186 -19 8 684 684 1 684 1 684 684 0 100.000 100 100 plus 0.0 1264 diff --git a/tests/test_data/outputs/search/G5/blast/nucleotide/queries.fasta b/tests/test_data/outputs/search/G5/blast/nucleotide/queries.fasta deleted file mode 100755 index 306e3c9..0000000 --- a/tests/test_data/outputs/search/G5/blast/nucleotide/queries.fasta +++ /dev/null @@ -1,40 +0,0 @@ ->0 -atgtactgaacaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa ->1 -atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa ->2 -gtgaacctggtgaaaaccacgggtctgcacgcggcgggcaaaggcattaacgttgccaaagtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccgacttcaacttttccggctttgacgtgactcccgcagactgggaacgctttgttaacgactccctgagctggctgggtcagttcgatatggtctgcgtcagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatctttgatagtagccgtgaagcgttagtcgccggtcttaaagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcacgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgtgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggtaagtaccgtcggcgcaggcgattccatggttggcggattgatttacggcctgctgatgcgcgagtccagcgaacatacgctgcgtctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga ->3 -atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga ->4 -atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga ->5 -ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctcttccgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccaacgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcagaaaggcgggattggtttgcgcggtccgggtgaaacccagctcgaaaccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgaaaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgctgctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaaatccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtctgcaggtacgaatgccgattgtcgactggcgtcgcctctgtaaacaagaaccagcgttgatcgaatacgtgatctag ->6 -gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga ->7 -atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctggtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatcagcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacgctgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcattcaaccggaatggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaacgctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgaggcgctggcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtgagctgaccagccttgtgcgcaaccttaatcaactgctcaaaagcgagcgcgaacgttataacaaataccgcacgaccctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatcagcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcataccccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttttggccgacagcatcccacacagaaagaggaataa ->8 -atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa ->9 -atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag ->10 -atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacattgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccaccgtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggccgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattactggaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcattcagggcggtacggaagtttgtcagcaaaaactggataccatgcgcaacgcaggcgtaaaagtgaacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggcgtcagcgcagagatcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacaggagaccggcaagctcggcgagatcctgttctttatgcgtgcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccggcaataactggcagttcgacggcgacgcggaaactattgcccactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgaaattaccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa ->11 -atgaaactttttctgacaacggcagcgctaactgcgaccttgatttccggcatggcatttgccagcgatcccgtcatcccctgggcgactaacagcggcggtacggaaagcacgcatattgcggcaatgggcgaggatttgaatgcgcagcatcagcagatcacccacacgcatgaaggcgtatgggcggccaattccggcagcattcaggctgatgaagccgcgctgaccagcaacaagccgccggtacaaggccacccggaattgatgccgcatcagggctaa ->12 -atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa ->13 -atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa ->14 -ctgaaactaatacctttttatttgctcgcgctattttctgccgcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaaccactgcagataacccgccttataccatgacaatattgccgccagtagaaggctgcggggaaaaatgtgtgaaactgaacgtacagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaaggacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctataaccgacagctgcaattactttgggctggcgtaataccgaaaaataaaaaggggaaatggataacgaaaatggaaaattactggttcaggcaggtaaaagcgcactaa ->15 -atgaaaaaattttatagctgtcttcctgtctttttactgatcggctgtgcccaggtgcccctcccttcctccgtgagcaaaccggtacagcaacctggcgctcagaaagagcaactggccaacgcaaatagtattgatgagtgtcagtctcttccgtatgtgccgtcagaccttgcgaagaataaatcattatcaaaccagaacgctgataattccgcatcaaaaaatagcgcaatcagctcaagcattttttgcgaaaaatataaacaaaccaaagaacaggcgcttaccttcttccaggaacatccacaatacatgcgttcgaaagaagatgaagagcaactcatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa ->16 -atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa ->17 -gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggcctcacaacatgcctggagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgtcagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcaccatccagtcggcaacctggttttccgcaggcaggtcgcttacggcggataaaaaaatgatggattgcggcagcggtatttatgcttcaatcaatacgttactgaaaaagtcgcagaataaaaatatagtcatttttacccataatcattgtttgacgtatattgcaaaaaataagcgcggcgtgaagtttgatccggactatcttaatgcgctcgttatgtatgcggaaaacggaaaactctttttagacggtgaatttgttcccggctag ->18 -atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga ->19 -atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/search/G5/blast/protein/hsps.txt b/tests/test_data/outputs/search/G5/blast/protein/hsps.txt deleted file mode 100755 index 823a08e..0000000 --- a/tests/test_data/outputs/search/G5/blast/protein/hsps.txt +++ /dev/null @@ -1,20 +0,0 @@ -0 0 34 34 1 34 1 34 34 2 94.118 100 100 N/A 2.04e-17 57.8 -1 9 254 254 1 254 1 254 254 0 100.000 100 100 N/A 0.0 514 -2 10 286 286 1 286 1 286 286 17 94.056 100 100 N/A 0.0 518 -3 11 324 324 1 324 1 324 324 0 100.000 100 100 N/A 0.0 640 -4 12 366 366 1 366 1 366 366 0 100.000 100 100 N/A 0.0 754 -5 13 427 427 1 427 1 427 427 9 97.892 100 100 N/A 0.0 846 -6 14 478 478 1 478 1 478 478 0 100.000 100 100 N/A 0.0 972 -7 15 488 488 1 488 1 488 488 14 97.131 100 100 N/A 0.0 972 -8 16 612 612 1 612 1 612 612 0 100.000 100 100 N/A 0.0 1241 -9 17 638 638 1 638 1 638 638 0 100.000 100 100 N/A 0.0 1326 -10 18 679 679 1 679 1 679 679 8 98.822 100 100 N/A 0.0 1403 -11 1 95 95 1 95 1 95 95 11 88.421 100 100 N/A 2.89e-51 147 -12 19 1645 1645 1 1645 1 1645 1645 0 100.000 100 100 N/A 0.0 3332 -13 2 109 109 1 109 1 109 109 0 100.000 100 100 N/A 3.09e-81 224 -14 3 139 139 1 139 1 139 139 9 93.525 100 100 N/A 3.70e-98 269 -15 4 148 148 1 148 1 148 148 11 92.568 100 100 N/A 1.56e-97 268 -16 5 181 181 1 181 1 181 181 0 100.000 100 100 N/A 1.79e-140 380 -17 6 202 202 1 202 1 202 202 14 93.069 100 100 N/A 2.36e-141 384 -18 7 214 214 1 214 1 214 214 0 100.000 100 100 N/A 1.75e-157 426 -19 8 228 228 1 228 1 228 228 0 100.000 100 100 N/A 6.32e-172 463 diff --git a/tests/test_data/outputs/search/G5/blast/protein/queries.fasta b/tests/test_data/outputs/search/G5/blast/protein/queries.fasta deleted file mode 100755 index 46a4b87..0000000 --- a/tests/test_data/outputs/search/G5/blast/protein/queries.fasta +++ /dev/null @@ -1,40 +0,0 @@ ->0 -MY*TPFLEALMITASFFAIFIIIVVSVLLLEGD* ->1 -MRILLSNDDGVHAPGIQTLAKALREFADVQVVAPDRNRSGASNSLTLESSLRTFTFDNGDIAVQMGTPTDCVYLGVNALMRPRPDIVVSGINAGPNLGDDVIYSGTVAAAMEGRHLGFPALAVSLNGYQHYDTAAAVTCALLRGLSREPLRTGRILNVNVPDLPLAQVKGIRVTRCGSRHPADKVIPQEDPRGNTLYWIGPPGDKYDAGPDTDFAAVDEGYVSVTPLHVDLTAHSAHDVVSDWLDSVGVGTQW* ->2 -VNLVKTTGLHAAGKGINVAKVLKDLGIDVTVGGFLGKDNQDGFQQLFSELGIANRFQVVQGRTRINVKLTEKDGEVTDFNFSGFDVTPADWERFVNDSLSWLGQFDMVCVSGSLPAGVSPEAFTDWMTRLRSQCPCIIFDSSREALVAGLKAAPWLVKPNRRELEIWAGRKLPEMKDVIDAAHALREQGIAHVVISLGAEGALWVNASGEWIAKPPAVDVVSTVGAGDSMVGGLIYGLLMRESSEHTLRLATAVAALAVSQSNVGITDRPQLAAMMARVDLQPFN* ->3 -MNKNKYSTPLLMLATILAGMLSPMQSAVNGQLGHWLQDGNACAVISFASGLVVMFFIIIARKETRQQFASIPTLIKKRKIPLWNWFAGLCGAMVVFSEGASASALGVATFQTALISALLLSGLLCDRFGIGVEEKKYFTPWRITGALFAVIATIFVVSPQWHSTSFILLAILPFLAGLLAGWQPAGNAKVAEATGSMLVSITWNFIVGFCVLGAALAIRIALGHVTIQLPDTWWMYLGGPLGLLSIGLMAILVRGLGLLMLGVASTAGQLLGSVLIDELIPSLGNTVYLVTIIGTLFALVGAIVTTIPEYRASKMAQKMEVSE* ->4 -MKGRWAKYVATGVMLAMLAACSSKPTDRGQQYKDGKFTQPFSLVNQPDAVGAPINAGDFAEQVDQIRSASPRLYTNQSNVYNAVQNWLRSGGDTRTMRQFGIDAWQMEGTDNYGNVQFTGYYTPVVQARHTRQGAFQYPIYSMPPKRGRLPSRAQIYAGALSDKYILAWSNSLMDNFIMDVQGSGYIDFGDGSPLNFFSYAGKNGWPYRSIGKVLIDRGEVKKEDMSMQAIREWGEKHSEAEVRELLEQNPSFVFFKPQSFAPVKGASAVPLIGRASVASDRSIIPPGTTLLAEVPLLDNNGKFSGQYELRLMVALDVGGAIKGQHFDIYQGIGPDAGHRAGWYNHYGRVWVLKSAPGAGNVFSG* ->5 -LFDRYDAGEQAVLVHIYFSQDKDMEDLQEFESLVSSAGVEAMQVITGSRKAPHPKYFVGEGKAVEIAEAVKATGAAVVLFDHALSPAQERNLERLCECRVIDRTGLILDIFAQRARTHEGKLQVELAQLRHLATRLVRGWTHLERQKGGIGLRGPGETQLETDRRLLRNRIVQIQSRLEKVEKQREQGRQSRIKADVPTVSLVGYTNAGKSTLFNQITEARVYAADQLFATLDPTLRRIDVADVGETVLADTVGFIRHLPYDLVAAFKATLQETRQATLLLHVVDAADVRVQENIEAVNTVLEEIDAHEIPTLMVMNKIDMLDDFEPRIDRDEENKPIRVWLSAQSGVGIPQLFQALTERLSGEVAQHTLRLLPQEGRLRSRFYQLQAIEKEWMEEDGSVSLQVRMPIVDWRRLCKQEPALIEYVI* ->6 -VAQRILVLGASGYIGQHLVFALSQQGHQVRAAARRVERLEKHRLANVSCHKVDLHWPENLPALLRDIDTVYYLVHGMGEGGDFIAHERQAALNVRDALRQTPVKQLIFLSSLQAPAHEQSDHLRARQLTADTLRDAGVPVTELRAGIIVGAGSAAFEVMRDMVYNLPILTPPRWVRSRTTPIALENLLYYLVGLLEHPAHEHRILEAAGPQVLSYQQQFERFMAVSGKRRPLIPVPFPTRWISVWFLNVITSVPPTTAKALIQGLRHDLLADDAALKKLIPQTLITFDDAVRRTLKEEEKLVNSSDWGYDALAFARWRPEYGYFPKQAGFTAQTPASLSALWQVVNRLGGKEGYFFGNILWQTRAAMDRLVGHKLAKGRPSHTLLKPGDTVDSWKVIIVEPEKQLTLLFGMKAPGLGRLSFTLHDKGRYREIDVRAWWHPHGMPGLIYWLLMIPAHLFIFRGMARRIARLAEQITEK* ->7 -MNKFARHFLPLSLRVRFLLATAGVVLVLSLAYGIVALVGYSVSFDKTTFRLLRGESNLFYTLAKWENNKISVELPENLDMQSPTMTLIYDETGKLLWTQRNIPWLIKSIQPEWLKTNGFHEIETNVDATSTLLSEDHSAQEKLKEVREDDDDAEMTHSVAVNIYPATTRMPQLTIVVVDTIPIELKRSYMVWSWFVYVLAANLLLVIPLLWIAAWWSLRPIEALAREVRELEDHHREMLNPETTRELTSLVRNLNQLLKSERERYNKYRTTLTDLTHSLKTPLAVLQSTLRSLRNEKMSVSKAEPVMLEQISRISQQIGYYLHRASMRGSGVLLSRELHPVAPLLDNLISALNKVYQRKGVNISMDISPEISFVGEQNDFVEVMGNVLDNACKYCLEFVEISARQTDDHLHIFVEDDGPGIPHSKRSLVFDRGQRADTLRPGQGVGLAVAREITEQYAGQIIASDSLLGGARMEVVFGRQHPTQKEE* ->8 -MTIQKRLLEAVEQKLLRPIDAQFALTVAGNDDPAVTLAAALLSHDAGEGHVCLPLSRLTLTEEAHPLLVAWISETATPIDWKKRLLASAAVSCGDSPAPLILCGDRLYLNRMWCNERTVARFFNEVNQAIAVDEDQLSRILDALFPPTDEVNWQKVAAAVALTRRISVISGGPGTGKTTTVAKLLAALIQMADGERCRIRLAAPTGKAAARLTESLGAALRQLPLTDAQKKRIPEDASTLHRLLGAQPGSQRLRHHAGNPLHLDVLVVDEASMIDLPMMSRLIDALPPHGRVIFLGDRDQLASVEAGAVLGDICAYVNAGFTAERARQLSRLTGSAIPAGAGTQAASLRDSLCLLQKSYRFGSDSGIGKLAAAINCGDRSAIQAVFQQGFSDIEKRTLQSSDDYAGMLDEALAGYGRYLRLLHEKAAPEAILQAFNEYQLLCALREGPFGVRGLNDRIEQAMVQQRKIQRHPHSRWYEGRPVMIARNDSALGLFNGDIGIALDRGQGLRVWFVMPDGTIKSVQPSRLPEHDTTWAMTVHKSQGSEFDHAALILPSQRSPVVTRELVYTAVTRARRRLSLYADERILAGAIVTRTERRSGLATLFDEVSRIG* ->9 -MQEVAMSSQEASKMLRTYNIAWWGNNYYDVNELGHISVCPDPDVPEARVDLAKLVKAREAQGQRLPALFCFPQILQHRLRSINAAFKRARESYGYNGDYFLVYPIKVNQHRRVIESLIHSGEPLGLEAGSKAELMAVLAHAGMTRSVIVCNGYKDREYIRLALIGEKMGHKVYLVIEKMSEIAIVLEEAERLNVVPRLGVRARLASQGSGKWQSSGGEKSKFGLAATQVLQLVETLRDAGRLDSLQLLHFHLGSQMANIRDIATGVRESARFYVELHKLGVNIQCFDVGGGLGVDYEGTRSQSDCSVNYGLNEYANNIIWAIGDACEEHGLPHPTVITESGRAVTAHHTVLVSNIIGVERNEYTDPTAPAEDAPRALQNLWETWQEMHKPGTRRSLREWLHDSQMDLHDIHIGYSSGAFSLQERAWAEQLYLSMCHEVQKQLDPQNRAHRPIIDELQERMADKMYVNFSLFQSMPDAWGIDQLFPVLPLEGLDQVPERRAVLLDITCDSDGAIDHYIDGDGIATTMPMPEYDPENPPMLGFFMVGAYQEILGNMHNLFGDTEAVDVFVFPDGSVEVELSDEGDTVADMLQYVQLDPKTLLTHFRDQVKQTDLDDALQQQFLEEFEAGLYGYTYLEDE* ->10 -MNSLPQRSTDFELTTSQDGFALSWQQRLILRHSAENPCLWIGAGVADIDMFRGNFSIKDKLNEKIALTEATVSELPDGWLVQFSRGATISATLRISADEAGRLTLDLQNDDLHHNRIWLRLAANPDDHIYGCGEQFSYFDLRGKPFPLWTSEQGVGRNKTSYVTWQADCKENAGGDYYWTFFPQPTFVSTQKYYCHVDNSCYMNFDFSAPEYHELALWEDKTTLRFECADTYIALLEKLTALLGRQPELPDWVYDGVTLGIQGGTEVCQQKLDTMRNAGVKVNGIWAQDWSGIRMTSFGKRVMWNWKWNSDNYPQLDSRIKQWKEEGVQFLSYINPYVASDKDLCAEAARHGYLAKDATGGDYLVEFGEFYGGVVDLTNPEAYDWFKDVIKKNMIALGCSGWMADFGEYLPTDTYLHNGVSAEIMHNAWPALWAKCNYEALQETGKLGEILFFMRAGYTGSQKYSTMMWAGDQNVDWSLDDGLASVVPAALSLAMTGHGLHHSDIGGYTTLFDMKRSKELLLRWCDFSAFTPMMRTHEGNRPGNNWQFDGDAETIAHFARMTTVFTTLKPYLKQAVAQNAATGLPVMRPLFLHYENDAATYTLKYQYLLGQDLLVAPVHEQGRCDWTLYLPEDHWVNIWTGEAHHGGEITVDAPIGKPPVFYRAKSEWALLFASLRNI* ->11 -MKLFLTTAALTATLISGMAFASDPVIPWATNSGGTESTHIAAMGEDLNAQHQQITHTHEGVWAANSGSIQADEAALTSNKPPVQGHPELMPHQG* ->12 -MKHLRVVACMIMLALAGCDNNDKTAPTTKSEAPAVAQPSPAQDPSQLQKLAQQSQGKALTLLDASEAQLDGAATLVLTFSIPLDPEQDFSRVVHVVDKKSGSVDGAWELAPNLKELRLRHLEPERVLVVTVDPAVKALNNATFGKSYEKTITTRDVQPSVGFASRGSLLPGKIAEGLPVMALNVNHVDVNFFRVKPGSLASFVSQWEYRSSLSNWESDNLLKMADLVYTGRFDLNPARNTREKLLLPLSDIKPLQQAGVYVAVMNQAGHYNYSNAATLFTLSDIGVSAHRYHSRLDIFTQSLENGAAQSGIEIVLLNDKGQTLAQATSDAQGHVQLEADKAAALLLARKEGQTTLLDLTLPALDLSEFNVAGAPGYSKQFFMFGPRDLYRPGETVILNGLLRDSDGKTLPDQPVKLEVVKPDGQVMRTVVSQPENGLYRLNYPLDINAPTGLWHVRANTGDNLLRSWDFHVEDFMPERMALNLTAQKTPLAPADEVKFSVVGYYLYGAPANGNTLQGQLFLRPLRDAVAALPGFQFGNIAEENLSRSLDEVQLTLDKGGRGEVSAASQWQEAHSPLQVILQASLLESGGRPVTRRVEQAIWPADTLPGIRPQFAAKAVYDYRTDTTVNQPIVDEDSNAAFDIVYANAQGEKKAVSGLQVRLIRERRDYYWNWSESEGWQSQFDQKDLVEGEQTLDLNADETGKVSFPVEWGAYRLEVKAPNETVSSVRFWAGYSWQDNSDGSGAARPDRVTLKLDKANYRPGDTMKLHIAAPVAGKGYAMVESSDGPLWWQAIDVPAQGLELTIPVDKTWNRHDLYLSTLVVRPGDKSRSATPKRAVGLLHLPLGDDNRRLDLALESPAKMRPNQPLTVRVKASVKHGEMPKQINVLVSAVDSGVLNITDYATPDPWQAFFGQKRYGADIYDIYGQVIEGQGRLAALRFGGDGDDLTRGGKPPVNHANIIAQQAQPITLNEQGEGVVTLPIGDFNGELRVMAQAWTADDFGRGESKVVVAAPVIAELNMPRFLAGGDVSRLVLDVTNLTDRPQTLNIALAASGLLELLSQQPQPVNLAPGVRTTLFVPVRALEGFGEGEIQATISGLNLPGETLGAQHKQWQIGVRPAWPAQTVNSGIALAPGESWHVPEQHLANVSPATLQGQLLLSGKPPLNLARYIRELKAYPYGCLEQTTSGLFPALYTNAAQLQSLGITGDSDEKRRAAVDIGISRILQMQRDNGGFALWDENGAEEPWLTAYAMDFLIRAGEQGYSVPPEAINRGNERLLRYLQDPGTMLIRYSDNTQASTFAAQAYAALVLARQQKAPLGALREIWERRSQAASGLPLMQLGIALNTMGDARRGEEAITLALNTPRQDERQWIADYGSSLRDNALMLSLLEENNLRPDAQNALLSSLSEQAFGQRWLSTQENNALFLAAHSRQASAGAWQAQTSLEAQPLSGDKALTRNLDADQLAALEVTNTGSQPLWLRLDSSGYPSSAPEPASNVLQIERQILGTDGQRKSLSSLRSGELVLVWLTVVADRNVPDALVVDLLPAGLELENQNLADSSASLPESGSEVQNLLNQMQQADIQYMEFRDDRFVAAVVVNEGQPVTLVYLARAVTPGTYQLAQPQVESMYAPQWRATGASEGLLIVTP* ->13 -MAKNRSRRLRKKMHIDEFQELGFSVAWRFPEGTSEEQIDKTVDDFINDVIEPNKLAFDGSGYLAWEGLICMQEIGKCTEEHQAIVRKWLEARNLEEVRTSELFDVWWD* ->14 -LKLIPFYLLALFSAASGATEINACKDLIGTWKTTADNPPYTMTILPPVEGCGEKCVKLNVQYELDVTHRNALYCHEGQEGVKGQGPMVIAFEGAYGGHAIGTYNRQLQLLWAGVIPKNKKGKWITKMENYWFRQVKAH* ->15 -MKKFYSCLPVFLLIGCAQVPLPSSVSKPVQQPGAQKEQLANANSIDECQSLPYVPSDLAKNKSLSNQNADNSASKNSAISSSIFCEKYKQTKEQALTFFQEHPQYMRSKEDEEQLMTEFKKVLLEPGSKNLSIYQTLLAAHERLQAL* ->16 -MRIKPDDNWRWYYDEEHDRMMLDLANGMLFRSRFSRKMLTPDAFCPTGFCVDDAALYFSFEEKCRDFELTKEQRAELVLNALVAIRYLKPQMPKSWHFVAHGEMWTPGTGDAASVWLSDTAEQVNLLVVEPGENAALCLLAQPGVVIAGRTMQLGDAIKIMNDRLKPQVHCHSFSLEQAV* ->17 -VLAFTLRFIKNKRYFAILAGALVIIAGLASQHAWSGNGLPQINGKALAALAKQHPVVVLFRHAERCDRSDNTCLSDSTGITVNGAQDARALGKAFSADIQNYNLYSSNTVRTIQSATWFSAGRSLTADKKMMDCGSGIYASINTLLKKSQNKNIVIFTHNHCLTYIAKNKRGVKFDPDYLNALVMYAENGKLFLDGEFVPG* ->18 -MGSNYIVIEGLEGAGKTTARDVVVETLEQLGIRNMIFTREPGGTQLAEKLRSLVLDIRSVGDEVITDKAEVLMFYAARVQLVETVIKPALAQGVWVIGDRHDLSTQAYQGGGRGIDQTMLATLRDAVLGDFRPDLTLYLDVTPEVGLKRARARGDLDRIEQESFDFFNRTRARYLELAAQDSRIRTIDATQPLDAVMRDIRATVTKWVQEQAA* ->19 -MKHIKKSVLVVLLTSHVAHASIVVGGTRLVFDGNNDESSINVENKDSKANLVQSWLSVADPQVTNKQVFIITPPLFRLDAGQKNSIRVIRSGAPLPADRESMYWLNIKGIPSIDDNASANRVEISINTQIKLIYRPPALTKSTPDSQSQQLKWQTAGDVITVNNPTPYYMNFASVTLNSHEVKSATFVPPKSSASFKLSSTAAPHGTVTWRLISDYGMSLEPHSGSF* diff --git a/tests/test_data/outputs/search/G5/run.json b/tests/test_data/outputs/search/G5/run.json index a639e7d..f74570b 100755 --- a/tests/test_data/outputs/search/G5/run.json +++ b/tests/test_data/outputs/search/G5/run.json @@ -1,11 +1,15 @@ { - "analysis_start_time": "10/06/2024 11:11:57", + "analysis_start_time": "19/06/2024 10:43:43", "parameters": { - "query": "locidex/extract/G5/raw.extracted.seqs.fasta", - "outdir": "locidex/search/G5", + "command": "search", + "query": "test_dev/extract/G5/raw.extracted.seqs.fasta", + "outdir": "test_dev/search/G5", + "db": "test_set/db", + "db_group": null, "name": "G5", - "db": "locidex/db", "config": null, + "db_name": "Locidex Test Database", + "db_version": "1.0.0", "min_evalue": 0.0001, "min_dna_len": 1, "min_aa_len": 1, @@ -20,8 +24,16 @@ "format": null, "translation_table": 11, "annotate": false, - "force": true + "force": true, + "db_date": "2024/06/06", + "db_author": "James Robertson", + "db_desc": "Using Salmonella GCA_000007545.2 as source data", + "db_num_seqs": 20, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" }, - "result_file": "locidex/search/G5/seq_store.json", - "analysis_end_time": "10/06/2024 11:11:59" + "result_file": "test_dev/search/G5/seq_store.json", + "analysis_end_time": "19/06/2024 10:43:44" } \ No newline at end of file diff --git a/tests/test_data/outputs/search/G5/seq_store.json b/tests/test_data/outputs/search/G5/seq_store.json index fbd2b3d..e2a8c51 100755 --- a/tests/test_data/outputs/search/G5/seq_store.json +++ b/tests/test_data/outputs/search/G5/seq_store.json @@ -460,261 +460,301 @@ "parent_id": "locus_1:0:0:0", "locus_name": "locus_1:0:0:0", "seq_id": "locus_1:0:0:0", + "dna_seq": "atgtactgaacaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa", + "dna_ambig_count": 0, "dna_hash": "e9e707ebc64e10a881f1323ebff85369", "dna_len": 102, + "aa_seq": "MY*TPFLEALMITASFFAIFIIIVVSVLLLEGD*", "aa_hash": "daa2576d97f92c0fe9161c023757d495", "aa_len": 34, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 1, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 1 }, "1": { "parent_id": "locus_10:9:0:1", "locus_name": "locus_10:9:0:1", "seq_id": "locus_10:9:0:1", + "dna_seq": "atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa", + "dna_ambig_count": 0, "dna_hash": "fe04d17ec353c08b903c85fc0ca4dc02", "dna_len": 762, + "aa_seq": "MRILLSNDDGVHAPGIQTLAKALREFADVQVVAPDRNRSGASNSLTLESSLRTFTFDNGDIAVQMGTPTDCVYLGVNALMRPRPDIVVSGINAGPNLGDDVIYSGTVAAAMEGRHLGFPALAVSLNGYQHYDTAAAVTCALLRGLSREPLRTGRILNVNVPDLPLAQVKGIRVTRCGSRHPADKVIPQEDPRGNTLYWIGPPGDKYDAGPDTDFAAVDEGYVSVTPLHVDLTAHSAHDVVSDWLDSVGVGTQW*", "aa_hash": "988bf512f0362e276b0e5622fbaa7079", "aa_len": 254, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "2": { "parent_id": "locus_11:10:0:2", "locus_name": "locus_11:10:0:2", "seq_id": "locus_11:10:0:2", + "dna_seq": "gtgaacctggtgaaaaccacgggtctgcacgcggcgggcaaaggcattaacgttgccaaagtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccgacttcaacttttccggctttgacgtgactcccgcagactgggaacgctttgttaacgactccctgagctggctgggtcagttcgatatggtctgcgtcagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatctttgatagtagccgtgaagcgttagtcgccggtcttaaagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcacgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgtgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggtaagtaccgtcggcgcaggcgattccatggttggcggattgatttacggcctgctgatgcgcgagtccagcgaacatacgctgcgtctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga", + "dna_ambig_count": 0, "dna_hash": "c4266f2f24fdd8e039113c6b0955af9f", "dna_len": 858, + "aa_seq": "VNLVKTTGLHAAGKGINVAKVLKDLGIDVTVGGFLGKDNQDGFQQLFSELGIANRFQVVQGRTRINVKLTEKDGEVTDFNFSGFDVTPADWERFVNDSLSWLGQFDMVCVSGSLPAGVSPEAFTDWMTRLRSQCPCIIFDSSREALVAGLKAAPWLVKPNRRELEIWAGRKLPEMKDVIDAAHALREQGIAHVVISLGAEGALWVNASGEWIAKPPAVDVVSTVGAGDSMVGGLIYGLLMRESSEHTLRLATAVAALAVSQSNVGITDRPQLAAMMARVDLQPFN*", "aa_hash": "9b9be0e0a2b6f84053716d6c14a0fb9a", "aa_len": 286, "start_codon": "gtg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "3": { "parent_id": "locus_12:11:0:3", "locus_name": "locus_12:11:0:3", "seq_id": "locus_12:11:0:3", + "dna_seq": "atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga", + "dna_ambig_count": 0, "dna_hash": "eb72da68c159497d5f0c8eeddc51b5ae", "dna_len": 972, + "aa_seq": "MNKNKYSTPLLMLATILAGMLSPMQSAVNGQLGHWLQDGNACAVISFASGLVVMFFIIIARKETRQQFASIPTLIKKRKIPLWNWFAGLCGAMVVFSEGASASALGVATFQTALISALLLSGLLCDRFGIGVEEKKYFTPWRITGALFAVIATIFVVSPQWHSTSFILLAILPFLAGLLAGWQPAGNAKVAEATGSMLVSITWNFIVGFCVLGAALAIRIALGHVTIQLPDTWWMYLGGPLGLLSIGLMAILVRGLGLLMLGVASTAGQLLGSVLIDELIPSLGNTVYLVTIIGTLFALVGAIVTTIPEYRASKMAQKMEVSE*", "aa_hash": "2fb77f6d0f615a030a08af74f3eda277", "aa_len": 324, "start_codon": "atg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "4": { "parent_id": "locus_13:12:0:4", "locus_name": "locus_13:12:0:4", "seq_id": "locus_13:12:0:4", + "dna_seq": "atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga", + "dna_ambig_count": 0, "dna_hash": "8f300259dcb46224bdc1fe5273107324", "dna_len": 1098, + "aa_seq": "MKGRWAKYVATGVMLAMLAACSSKPTDRGQQYKDGKFTQPFSLVNQPDAVGAPINAGDFAEQVDQIRSASPRLYTNQSNVYNAVQNWLRSGGDTRTMRQFGIDAWQMEGTDNYGNVQFTGYYTPVVQARHTRQGAFQYPIYSMPPKRGRLPSRAQIYAGALSDKYILAWSNSLMDNFIMDVQGSGYIDFGDGSPLNFFSYAGKNGWPYRSIGKVLIDRGEVKKEDMSMQAIREWGEKHSEAEVRELLEQNPSFVFFKPQSFAPVKGASAVPLIGRASVASDRSIIPPGTTLLAEVPLLDNNGKFSGQYELRLMVALDVGGAIKGQHFDIYQGIGPDAGHRAGWYNHYGRVWVLKSAPGAGNVFSG*", "aa_hash": "fd5140611af6e0d9f95426bf9beaa8b1", "aa_len": 366, "start_codon": "atg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "5": { "parent_id": "locus_14:13:0:5", "locus_name": "locus_14:13:0:5", "seq_id": "locus_14:13:0:5", + "dna_seq": "ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctcttccgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccaacgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcagaaaggcgggattggtttgcgcggtccgggtgaaacccagctcgaaaccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgaaaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgctgctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaaatccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtctgcaggtacgaatgccgattgtcgactggcgtcgcctctgtaaacaagaaccagcgttgatcgaatacgtgatctag", + "dna_ambig_count": 0, "dna_hash": "2fa0b06ed72e36b4071cab9d0b4f87d0", "dna_len": 1281, + "aa_seq": "LFDRYDAGEQAVLVHIYFSQDKDMEDLQEFESLVSSAGVEAMQVITGSRKAPHPKYFVGEGKAVEIAEAVKATGAAVVLFDHALSPAQERNLERLCECRVIDRTGLILDIFAQRARTHEGKLQVELAQLRHLATRLVRGWTHLERQKGGIGLRGPGETQLETDRRLLRNRIVQIQSRLEKVEKQREQGRQSRIKADVPTVSLVGYTNAGKSTLFNQITEARVYAADQLFATLDPTLRRIDVADVGETVLADTVGFIRHLPYDLVAAFKATLQETRQATLLLHVVDAADVRVQENIEAVNTVLEEIDAHEIPTLMVMNKIDMLDDFEPRIDRDEENKPIRVWLSAQSGVGIPQLFQALTERLSGEVAQHTLRLLPQEGRLRSRFYQLQAIEKEWMEEDGSVSLQVRMPIVDWRRLCKQEPALIEYVI*", "aa_hash": "bf5190f310477277da454725d434a8ee", "aa_len": 427, "start_codon": "ttg", - "stop_codon": "tag", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tag", + "count_internal_stop": 0 }, "6": { "parent_id": "locus_15:14:0:6", "locus_name": "locus_15:14:0:6", "seq_id": "locus_15:14:0:6", + "dna_seq": "gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga", + "dna_ambig_count": 0, "dna_hash": "bc98c2fe196a68a79036814396513a8d", "dna_len": 1434, + "aa_seq": "VAQRILVLGASGYIGQHLVFALSQQGHQVRAAARRVERLEKHRLANVSCHKVDLHWPENLPALLRDIDTVYYLVHGMGEGGDFIAHERQAALNVRDALRQTPVKQLIFLSSLQAPAHEQSDHLRARQLTADTLRDAGVPVTELRAGIIVGAGSAAFEVMRDMVYNLPILTPPRWVRSRTTPIALENLLYYLVGLLEHPAHEHRILEAAGPQVLSYQQQFERFMAVSGKRRPLIPVPFPTRWISVWFLNVITSVPPTTAKALIQGLRHDLLADDAALKKLIPQTLITFDDAVRRTLKEEEKLVNSSDWGYDALAFARWRPEYGYFPKQAGFTAQTPASLSALWQVVNRLGGKEGYFFGNILWQTRAAMDRLVGHKLAKGRPSHTLLKPGDTVDSWKVIIVEPEKQLTLLFGMKAPGLGRLSFTLHDKGRYREIDVRAWWHPHGMPGLIYWLLMIPAHLFIFRGMARRIARLAEQITEK*", "aa_hash": "5d8c3cf06444c3741ebfc57cc92db0d3", "aa_len": 478, "start_codon": "gtg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "7": { "parent_id": "locus_16:15:0:7", "locus_name": "locus_16:15:0:7", "seq_id": "locus_16:15:0:7", + "dna_seq": "atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctggtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatcagcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacgctgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcattcaaccggaatggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaacgctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgaggcgctggcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtgagctgaccagccttgtgcgcaaccttaatcaactgctcaaaagcgagcgcgaacgttataacaaataccgcacgaccctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatcagcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcataccccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttttggccgacagcatcccacacagaaagaggaataa", + "dna_ambig_count": 0, "dna_hash": "a9b3cb97dac3cda6e932a49bf9a507bd", "dna_len": 1464, + "aa_seq": "MNKFARHFLPLSLRVRFLLATAGVVLVLSLAYGIVALVGYSVSFDKTTFRLLRGESNLFYTLAKWENNKISVELPENLDMQSPTMTLIYDETGKLLWTQRNIPWLIKSIQPEWLKTNGFHEIETNVDATSTLLSEDHSAQEKLKEVREDDDDAEMTHSVAVNIYPATTRMPQLTIVVVDTIPIELKRSYMVWSWFVYVLAANLLLVIPLLWIAAWWSLRPIEALAREVRELEDHHREMLNPETTRELTSLVRNLNQLLKSERERYNKYRTTLTDLTHSLKTPLAVLQSTLRSLRNEKMSVSKAEPVMLEQISRISQQIGYYLHRASMRGSGVLLSRELHPVAPLLDNLISALNKVYQRKGVNISMDISPEISFVGEQNDFVEVMGNVLDNACKYCLEFVEISARQTDDHLHIFVEDDGPGIPHSKRSLVFDRGQRADTLRPGQGVGLAVAREITEQYAGQIIASDSLLGGARMEVVFGRQHPTQKEE*", "aa_hash": "3ca5f1d7b46eda9460608ef61603c12f", "aa_len": 488, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "8": { "parent_id": "locus_17:16:0:8", "locus_name": "locus_17:16:0:8", "seq_id": "locus_17:16:0:8", + "dna_seq": "atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa", + "dna_ambig_count": 0, "dna_hash": "a0d97d985483413f3c18bfe5833ae9ce", "dna_len": 1836, + "aa_seq": "MTIQKRLLEAVEQKLLRPIDAQFALTVAGNDDPAVTLAAALLSHDAGEGHVCLPLSRLTLTEEAHPLLVAWISETATPIDWKKRLLASAAVSCGDSPAPLILCGDRLYLNRMWCNERTVARFFNEVNQAIAVDEDQLSRILDALFPPTDEVNWQKVAAAVALTRRISVISGGPGTGKTTTVAKLLAALIQMADGERCRIRLAAPTGKAAARLTESLGAALRQLPLTDAQKKRIPEDASTLHRLLGAQPGSQRLRHHAGNPLHLDVLVVDEASMIDLPMMSRLIDALPPHGRVIFLGDRDQLASVEAGAVLGDICAYVNAGFTAERARQLSRLTGSAIPAGAGTQAASLRDSLCLLQKSYRFGSDSGIGKLAAAINCGDRSAIQAVFQQGFSDIEKRTLQSSDDYAGMLDEALAGYGRYLRLLHEKAAPEAILQAFNEYQLLCALREGPFGVRGLNDRIEQAMVQQRKIQRHPHSRWYEGRPVMIARNDSALGLFNGDIGIALDRGQGLRVWFVMPDGTIKSVQPSRLPEHDTTWAMTVHKSQGSEFDHAALILPSQRSPVVTRELVYTAVTRARRRLSLYADERILAGAIVTRTERRSGLATLFDEVSRIG*", "aa_hash": "bdd2b967a7a6337b5973dd723a62fa43", "aa_len": 612, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "9": { "parent_id": "locus_18:17:0:9", "locus_name": "locus_18:17:0:9", "seq_id": "locus_18:17:0:9", + "dna_seq": "atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag", + "dna_ambig_count": 0, "dna_hash": "b3021e979faa7600756c06dfadfcf14c", "dna_len": 1914, + "aa_seq": "MQEVAMSSQEASKMLRTYNIAWWGNNYYDVNELGHISVCPDPDVPEARVDLAKLVKAREAQGQRLPALFCFPQILQHRLRSINAAFKRARESYGYNGDYFLVYPIKVNQHRRVIESLIHSGEPLGLEAGSKAELMAVLAHAGMTRSVIVCNGYKDREYIRLALIGEKMGHKVYLVIEKMSEIAIVLEEAERLNVVPRLGVRARLASQGSGKWQSSGGEKSKFGLAATQVLQLVETLRDAGRLDSLQLLHFHLGSQMANIRDIATGVRESARFYVELHKLGVNIQCFDVGGGLGVDYEGTRSQSDCSVNYGLNEYANNIIWAIGDACEEHGLPHPTVITESGRAVTAHHTVLVSNIIGVERNEYTDPTAPAEDAPRALQNLWETWQEMHKPGTRRSLREWLHDSQMDLHDIHIGYSSGAFSLQERAWAEQLYLSMCHEVQKQLDPQNRAHRPIIDELQERMADKMYVNFSLFQSMPDAWGIDQLFPVLPLEGLDQVPERRAVLLDITCDSDGAIDHYIDGDGIATTMPMPEYDPENPPMLGFFMVGAYQEILGNMHNLFGDTEAVDVFVFPDGSVEVELSDEGDTVADMLQYVQLDPKTLLTHFRDQVKQTDLDDALQQQFLEEFEAGLYGYTYLEDE*", "aa_hash": "42c4a831ee79a27c47138fe96829814b", "aa_len": 638, "start_codon": "atg", - "stop_codon": "tag", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tag", + "count_internal_stop": 0 }, "10": { "parent_id": "locus_19:18:0:10", "locus_name": "locus_19:18:0:10", "seq_id": "locus_19:18:0:10", + "dna_seq": "atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacattgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccaccgtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggccgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattactggaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcattcagggcggtacggaagtttgtcagcaaaaactggataccatgcgcaacgcaggcgtaaaagtgaacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggcgtcagcgcagagatcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacaggagaccggcaagctcggcgagatcctgttctttatgcgtgcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccggcaataactggcagttcgacggcgacgcggaaactattgcccactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgaaattaccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa", + "dna_ambig_count": 0, "dna_hash": "de32372598811d63bcc1a0eaf6872644", "dna_len": 2037, + "aa_seq": "MNSLPQRSTDFELTTSQDGFALSWQQRLILRHSAENPCLWIGAGVADIDMFRGNFSIKDKLNEKIALTEATVSELPDGWLVQFSRGATISATLRISADEAGRLTLDLQNDDLHHNRIWLRLAANPDDHIYGCGEQFSYFDLRGKPFPLWTSEQGVGRNKTSYVTWQADCKENAGGDYYWTFFPQPTFVSTQKYYCHVDNSCYMNFDFSAPEYHELALWEDKTTLRFECADTYIALLEKLTALLGRQPELPDWVYDGVTLGIQGGTEVCQQKLDTMRNAGVKVNGIWAQDWSGIRMTSFGKRVMWNWKWNSDNYPQLDSRIKQWKEEGVQFLSYINPYVASDKDLCAEAARHGYLAKDATGGDYLVEFGEFYGGVVDLTNPEAYDWFKDVIKKNMIALGCSGWMADFGEYLPTDTYLHNGVSAEIMHNAWPALWAKCNYEALQETGKLGEILFFMRAGYTGSQKYSTMMWAGDQNVDWSLDDGLASVVPAALSLAMTGHGLHHSDIGGYTTLFDMKRSKELLLRWCDFSAFTPMMRTHEGNRPGNNWQFDGDAETIAHFARMTTVFTTLKPYLKQAVAQNAATGLPVMRPLFLHYENDAATYTLKYQYLLGQDLLVAPVHEQGRCDWTLYLPEDHWVNIWTGEAHHGGEITVDAPIGKPPVFYRAKSEWALLFASLRNI*", "aa_hash": "a48a4e4dc8c7f61a7be06a7f72142198", "aa_len": 679, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "11": { "parent_id": "locus_2:1:0:11", "locus_name": "locus_2:1:0:11", "seq_id": "locus_2:1:0:11", + "dna_seq": "atgaaactttttctgacaacggcagcgctaactgcgaccttgatttccggcatggcatttgccagcgatcccgtcatcccctgggcgactaacagcggcggtacggaaagcacgcatattgcggcaatgggcgaggatttgaatgcgcagcatcagcagatcacccacacgcatgaaggcgtatgggcggccaattccggcagcattcaggctgatgaagccgcgctgaccagcaacaagccgccggtacaaggccacccggaattgatgccgcatcagggctaa", + "dna_ambig_count": 0, "dna_hash": "8b70e777f6bbf2c91ff75947824b5976", "dna_len": 285, + "aa_seq": "MKLFLTTAALTATLISGMAFASDPVIPWATNSGGTESTHIAAMGEDLNAQHQQITHTHEGVWAANSGSIQADEAALTSNKPPVQGHPELMPHQG*", "aa_hash": "6e403f4ed2da629ea2ebfe18278ed120", "aa_len": 95, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "12": { "parent_id": "locus_20:19:0:12", "locus_name": "locus_20:19:0:12", "seq_id": "locus_20:19:0:12", + "dna_seq": "atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa", + "dna_ambig_count": 0, "dna_hash": "4461918e985715e4a2b07494e1f91326", "dna_len": 4935, + "aa_seq": "MKHLRVVACMIMLALAGCDNNDKTAPTTKSEAPAVAQPSPAQDPSQLQKLAQQSQGKALTLLDASEAQLDGAATLVLTFSIPLDPEQDFSRVVHVVDKKSGSVDGAWELAPNLKELRLRHLEPERVLVVTVDPAVKALNNATFGKSYEKTITTRDVQPSVGFASRGSLLPGKIAEGLPVMALNVNHVDVNFFRVKPGSLASFVSQWEYRSSLSNWESDNLLKMADLVYTGRFDLNPARNTREKLLLPLSDIKPLQQAGVYVAVMNQAGHYNYSNAATLFTLSDIGVSAHRYHSRLDIFTQSLENGAAQSGIEIVLLNDKGQTLAQATSDAQGHVQLEADKAAALLLARKEGQTTLLDLTLPALDLSEFNVAGAPGYSKQFFMFGPRDLYRPGETVILNGLLRDSDGKTLPDQPVKLEVVKPDGQVMRTVVSQPENGLYRLNYPLDINAPTGLWHVRANTGDNLLRSWDFHVEDFMPERMALNLTAQKTPLAPADEVKFSVVGYYLYGAPANGNTLQGQLFLRPLRDAVAALPGFQFGNIAEENLSRSLDEVQLTLDKGGRGEVSAASQWQEAHSPLQVILQASLLESGGRPVTRRVEQAIWPADTLPGIRPQFAAKAVYDYRTDTTVNQPIVDEDSNAAFDIVYANAQGEKKAVSGLQVRLIRERRDYYWNWSESEGWQSQFDQKDLVEGEQTLDLNADETGKVSFPVEWGAYRLEVKAPNETVSSVRFWAGYSWQDNSDGSGAARPDRVTLKLDKANYRPGDTMKLHIAAPVAGKGYAMVESSDGPLWWQAIDVPAQGLELTIPVDKTWNRHDLYLSTLVVRPGDKSRSATPKRAVGLLHLPLGDDNRRLDLALESPAKMRPNQPLTVRVKASVKHGEMPKQINVLVSAVDSGVLNITDYATPDPWQAFFGQKRYGADIYDIYGQVIEGQGRLAALRFGGDGDDLTRGGKPPVNHANIIAQQAQPITLNEQGEGVVTLPIGDFNGELRVMAQAWTADDFGRGESKVVVAAPVIAELNMPRFLAGGDVSRLVLDVTNLTDRPQTLNIALAASGLLELLSQQPQPVNLAPGVRTTLFVPVRALEGFGEGEIQATISGLNLPGETLGAQHKQWQIGVRPAWPAQTVNSGIALAPGESWHVPEQHLANVSPATLQGQLLLSGKPPLNLARYIRELKAYPYGCLEQTTSGLFPALYTNAAQLQSLGITGDSDEKRRAAVDIGISRILQMQRDNGGFALWDENGAEEPWLTAYAMDFLIRAGEQGYSVPPEAINRGNERLLRYLQDPGTMLIRYSDNTQASTFAAQAYAALVLARQQKAPLGALREIWERRSQAASGLPLMQLGIALNTMGDARRGEEAITLALNTPRQDERQWIADYGSSLRDNALMLSLLEENNLRPDAQNALLSSLSEQAFGQRWLSTQENNALFLAAHSRQASAGAWQAQTSLEAQPLSGDKALTRNLDADQLAALEVTNTGSQPLWLRLDSSGYPSSAPEPASNVLQIERQILGTDGQRKSLSSLRSGELVLVWLTVVADRNVPDALVVDLLPAGLELENQNLADSSASLPESGSEVQNLLNQMQQADIQYMEFRDDRFVAAVVVNEGQPVTLVYLARAVTPGTYQLAQPQVESMYAPQWRATGASEGLLIVTP*", "aa_hash": "ffe361d9273afbab0ed9009dd5aa4041", "aa_len": 1645, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "13": { "parent_id": "locus_3:2:0:13", "locus_name": "locus_3:2:0:13", "seq_id": "locus_3:2:0:13", + "dna_seq": "atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa", + "dna_ambig_count": 0, "dna_hash": "670705cd2a59c4a23a897ac656a888fe", "dna_len": 327, + "aa_seq": "MAKNRSRRLRKKMHIDEFQELGFSVAWRFPEGTSEEQIDKTVDDFINDVIEPNKLAFDGSGYLAWEGLICMQEIGKCTEEHQAIVRKWLEARNLEEVRTSELFDVWWD*", "aa_hash": "f46f4f7b2362abaf1440c9752e10670e", "aa_len": 109, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "14": { "parent_id": "locus_4:3:0:14", "locus_name": "locus_4:3:0:14", "seq_id": "locus_4:3:0:14", + "dna_seq": "ctgaaactaatacctttttatttgctcgcgctattttctgccgcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaaccactgcagataacccgccttataccatgacaatattgccgccagtagaaggctgcggggaaaaatgtgtgaaactgaacgtacagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaaggacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctataaccgacagctgcaattactttgggctggcgtaataccgaaaaataaaaaggggaaatggataacgaaaatggaaaattactggttcaggcaggtaaaagcgcactaa", + "dna_ambig_count": 0, "dna_hash": "73790840c76943caac0ebb3b2b3f0b98", "dna_len": 417, + "aa_seq": "LKLIPFYLLALFSAASGATEINACKDLIGTWKTTADNPPYTMTILPPVEGCGEKCVKLNVQYELDVTHRNALYCHEGQEGVKGQGPMVIAFEGAYGGHAIGTYNRQLQLLWAGVIPKNKKGKWITKMENYWFRQVKAH*", "aa_hash": "77784601d754a5f36152853592023b08", "aa_len": 139, "start_codon": "ctg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "15": { "parent_id": "locus_5:4:0:15", "locus_name": "locus_5:4:0:15", "seq_id": "locus_5:4:0:15", + "dna_seq": "atgaaaaaattttatagctgtcttcctgtctttttactgatcggctgtgcccaggtgcccctcccttcctccgtgagcaaaccggtacagcaacctggcgctcagaaagagcaactggccaacgcaaatagtattgatgagtgtcagtctcttccgtatgtgccgtcagaccttgcgaagaataaatcattatcaaaccagaacgctgataattccgcatcaaaaaatagcgcaatcagctcaagcattttttgcgaaaaatataaacaaaccaaagaacaggcgcttaccttcttccaggaacatccacaatacatgcgttcgaaagaagatgaagagcaactcatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa", + "dna_ambig_count": 0, "dna_hash": "8cf4341689dd00f74adfcc43d1f4a35e", "dna_len": 444, + "aa_seq": "MKKFYSCLPVFLLIGCAQVPLPSSVSKPVQQPGAQKEQLANANSIDECQSLPYVPSDLAKNKSLSNQNADNSASKNSAISSSIFCEKYKQTKEQALTFFQEHPQYMRSKEDEEQLMTEFKKVLLEPGSKNLSIYQTLLAAHERLQAL*", "aa_hash": "736cc3184dda2c5ac596f76753272622", "aa_len": 148, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "16": { "parent_id": "locus_6:5:0:16", "locus_name": "locus_6:5:0:16", "seq_id": "locus_6:5:0:16", + "dna_seq": "atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa", + "dna_ambig_count": 0, "dna_hash": "a11561f2804e2c32c78049f8b9aeb517", "dna_len": 543, + "aa_seq": "MRIKPDDNWRWYYDEEHDRMMLDLANGMLFRSRFSRKMLTPDAFCPTGFCVDDAALYFSFEEKCRDFELTKEQRAELVLNALVAIRYLKPQMPKSWHFVAHGEMWTPGTGDAASVWLSDTAEQVNLLVVEPGENAALCLLAQPGVVIAGRTMQLGDAIKIMNDRLKPQVHCHSFSLEQAV*", "aa_hash": "f7a904be6607ca5a6d9388ad9ca3693a", "aa_len": 181, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "17": { "parent_id": "locus_7:6:0:17", "locus_name": "locus_7:6:0:17", "seq_id": "locus_7:6:0:17", + "dna_seq": "gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggcctcacaacatgcctggagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgtcagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcaccatccagtcggcaacctggttttccgcaggcaggtcgcttacggcggataaaaaaatgatggattgcggcagcggtatttatgcttcaatcaatacgttactgaaaaagtcgcagaataaaaatatagtcatttttacccataatcattgtttgacgtatattgcaaaaaataagcgcggcgtgaagtttgatccggactatcttaatgcgctcgttatgtatgcggaaaacggaaaactctttttagacggtgaatttgttcccggctag", + "dna_ambig_count": 0, "dna_hash": "49d9878c9d3071aa1d2f26cb947b784c", "dna_len": 606, + "aa_seq": "VLAFTLRFIKNKRYFAILAGALVIIAGLASQHAWSGNGLPQINGKALAALAKQHPVVVLFRHAERCDRSDNTCLSDSTGITVNGAQDARALGKAFSADIQNYNLYSSNTVRTIQSATWFSAGRSLTADKKMMDCGSGIYASINTLLKKSQNKNIVIFTHNHCLTYIAKNKRGVKFDPDYLNALVMYAENGKLFLDGEFVPG*", "aa_hash": "a1169e1ef4c2882247a9349da07cb6bd", "aa_len": 202, "start_codon": "gtg", - "stop_codon": "tag", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tag", + "count_internal_stop": 0 }, "18": { "parent_id": "locus_8:7:0:18", "locus_name": "locus_8:7:0:18", "seq_id": "locus_8:7:0:18", + "dna_seq": "atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga", + "dna_ambig_count": 0, "dna_hash": "7ebe74afecf146ec4db816c8deced64f", "dna_len": 642, + "aa_seq": "MGSNYIVIEGLEGAGKTTARDVVVETLEQLGIRNMIFTREPGGTQLAEKLRSLVLDIRSVGDEVITDKAEVLMFYAARVQLVETVIKPALAQGVWVIGDRHDLSTQAYQGGGRGIDQTMLATLRDAVLGDFRPDLTLYLDVTPEVGLKRARARGDLDRIEQESFDFFNRTRARYLELAAQDSRIRTIDATQPLDAVMRDIRATVTKWVQEQAA*", "aa_hash": "ad5a47cb3a55985083b963b0cf67f382", "aa_len": 214, "start_codon": "atg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "19": { "parent_id": "locus_9:8:0:19", "locus_name": "locus_9:8:0:19", "seq_id": "locus_9:8:0:19", + "dna_seq": "atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag", + "dna_ambig_count": 0, "dna_hash": "41ebb36872854b2b33c8c028e23d8ad1", "dna_len": 684, + "aa_seq": "MKHIKKSVLVVLLTSHVAHASIVVGGTRLVFDGNNDESSINVENKDSKANLVQSWLSVADPQVTNKQVFIITPPLFRLDAGQKNSIRVIRSGAPLPADRESMYWLNIKGIPSIDDNASANRVEISINTQIKLIYRPPALTKSTPDSQSQQLKWQTAGDVITVNNPTPYYMNFASVTLNSHEVKSATFVPPKSSASFKLSSTAAPHGTVTWRLISDYGMSLEPHSGSF*", "aa_hash": "02989d1dbb505d15f06e76ddc9174b7e", "aa_len": 228, "start_codon": "atg", - "stop_codon": "tag", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tag", + "count_internal_stop": 0 } }, "query_hit_columns": [], diff --git a/tests/test_data/outputs/search/G6/blast/nucleotide/hsps.txt b/tests/test_data/outputs/search/G6/blast/nucleotide/hsps.txt deleted file mode 100755 index 8e411cd..0000000 --- a/tests/test_data/outputs/search/G6/blast/nucleotide/hsps.txt +++ /dev/null @@ -1,20 +0,0 @@ -0 0 102 102 1 102 1 102 102 2 98.039 100 100 plus 3.90e-48 178 -1 9 762 762 1 762 1 762 762 0 100.000 100 100 plus 0.0 1408 -2 10 858 858 1 858 1 858 858 0 100.000 100 100 plus 0.0 1585 -3 11 972 972 1 972 1 972 972 0 100.000 100 100 plus 0.0 1796 -4 12 1098 1098 1 1098 1 1098 1098 0 100.000 100 100 plus 0.0 2028 -5 13 1281 1281 1 1281 1 1281 1281 0 100.000 100 100 plus 0.0 2366 -6 14 1434 1434 1 1434 1 1434 1434 0 100.000 100 100 plus 0.0 2649 -7 15 1464 1464 1 1464 1 1464 1464 0 100.000 100 100 plus 0.0 2704 -8 16 1836 1836 1 1836 1 1836 1836 0 100.000 100 100 plus 0.0 3391 -9 17 1914 1914 1 1914 1 1914 1914 0 100.000 100 100 plus 0.0 3535 -10 18 2037 2037 1 2037 1 2037 2037 0 100.000 100 100 plus 0.0 3762 -11 1 285 285 1 285 1 285 285 0 100.000 100 100 plus 1.03e-152 527 -12 19 4935 4935 1 4935 1 4935 4935 0 100.000 100 100 plus 0.0 9114 -13 2 327 327 1 327 1 327 327 0 100.000 100 100 plus 5.34e-176 604 -14 3 417 417 1 417 1 417 417 0 100.000 100 100 plus 0.0 771 -15 4 444 444 1 444 1 444 444 0 100.000 100 100 plus 0.0 821 -16 5 543 543 1 543 1 543 543 0 100.000 100 100 plus 0.0 1003 -17 6 606 606 1 606 1 606 606 0 100.000 100 100 plus 0.0 1120 -18 7 642 642 1 642 1 642 642 0 100.000 100 100 plus 0.0 1186 -19 8 684 684 1 684 1 684 684 0 100.000 100 100 plus 0.0 1264 diff --git a/tests/test_data/outputs/search/G6/blast/nucleotide/queries.fasta b/tests/test_data/outputs/search/G6/blast/nucleotide/queries.fasta deleted file mode 100755 index 26e0ed8..0000000 --- a/tests/test_data/outputs/search/G6/blast/nucleotide/queries.fasta +++ /dev/null @@ -1,40 +0,0 @@ ->0 -atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtgattcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa ->1 -atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa ->2 -gtgaacctggtgaaaaccccgggtctgcacgcggcgggcaaaggcattaacgttgccaatgtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccaacttcaactttcccggctttgacgtgactcccgcagactgggaacgctttgttaacgacttcctgagctggctgggtcagttcgatatggtctgcgtaagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatcgttgatactagccgtgtagagttagtcgccggtcttatagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcccgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgcgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggaaaataccgtcggcgcaggcggttccatggttggcggatggatttacggcctgctgatgcgcgagtccaccgaacatacgctgcgcctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga ->3 -atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga ->4 -atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga ->5 -ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctattacgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccagcgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcggaaaggcgggattggttcgcgcggtccgggtgaaacccagctcgaagccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgagaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgcagctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaattccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtccgcaggtacgaatgccgattgtcgactggcgtcgcttctgtaaacaagaaccagcgttgatcgaatacgtgatctag ->6 -gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga ->7 -atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctcgtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatctgcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacggtgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcactcaaccggaagggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaaggctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgatgcgctgtcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtaagctgatcagccttgtgcgcaaccttaatcaactgctcaaaagcgagcacgaacgttataacaaataccgcacgagcctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatctgcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcatagcccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttctggccgacagcatcccacacagaaagaggaataa ->8 -atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa ->9 -atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag ->10 -atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacatcgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccacagtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggtcgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattacttgaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcatacagggcggtacggaagtttgtcagcaaaaactggataacatgcgcaacgcaggcgtaaaagtgtacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggggtcagcgcagagctcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacagaagaccggcaagctcggcgagatcctgttctttatgcgggcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccagcaatgactggcagttcgacggcgacgcggaaactattgcacactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgagatttccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa ->11 -atgaaactttttctgacaacggcagcgctaactgcgaccttgacttccggcatgggatttgccagcgataccgtgatcccctgggcgactaacagtggcggtacggaaagcacgcatatcgcggcaatgggcgaggatgtgaatgcgcagcatcagcacatcatccacacgcatgaaggcgtatgtgccgcaaattccggcaccattcaggctgatgaagccgcgctgaccagcaacaagccaccggtacaagtccaaccggaattgttgccgcatcagggctaa ->12 -atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa ->13 -atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa ->14 -ctgaaactaatacctttttatttgctcgcgctattttctgcctcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaacccctgcagataacccgccttataccgtgacaatattgccgccagtagaagcctgcggggaaaaatgtgtgaaactgaacgtagagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaacgacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctacaaccgacagctgcaattacttagggctggcgtaataccgaaaaataaaaagtggaaatggataacgaagatggaaaattactggttcaggcgggtaaaagcgcactaa ->15 -atgaaaaaatttgatagctgtcatcctgtctttttactgatcggctgtgcccaggtgcctctccgttcctccgtgagcaaaccggtacagcaacctagcgctcagaaagagcaactggccaacgcaaatggtattgatgagtgtcagtcacttccgtatgtgccttcagaccttgcgaagaataaatcattatcaaaccagatcgctgataataccgcatcaaaaaatagcgcaatcagctcacgcattttttgcgaaaaatataagcaaaccaaagaacaggcgcttaccttcttccaggaacttccacaatacatgcgttcgaaagaagttgaagagcaacacatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa ->16 -atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa ->17 -gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggactcacaacatgcctgtagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgccagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcgccatccagtcggcaacctggttttcggcaggcaggtcgcgtagggcggttaaaaaaatgatggattgcggcagcggtatttattcttcaatcaatacgttactgaaaaattcgcagattaaaaatatagtcattttaacccataatcattgtttgacgtatattgtaaaaaataagcgcggcgtgaagtttgatccggagtatcttaatgcgctcgttatgtatgcggaaaacggaaaactccttttagacggtgaatttgttcccggctag ->18 -atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga ->19 -atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/search/G6/blast/protein/hsps.txt b/tests/test_data/outputs/search/G6/blast/protein/hsps.txt deleted file mode 100755 index f47c1c9..0000000 --- a/tests/test_data/outputs/search/G6/blast/protein/hsps.txt +++ /dev/null @@ -1,20 +0,0 @@ -0 0 34 34 1 34 1 34 34 1 97.059 100 100 N/A 1.28e-18 60.8 -1 9 254 254 1 254 1 254 254 0 100.000 100 100 N/A 0.0 514 -2 10 286 286 1 286 1 286 286 0 100.000 100 100 N/A 0.0 579 -3 11 324 324 1 324 1 324 324 0 100.000 100 100 N/A 0.0 640 -4 12 366 366 1 366 1 366 366 0 100.000 100 100 N/A 0.0 754 -5 13 427 427 1 427 1 427 427 0 100.000 100 100 N/A 0.0 869 -6 14 478 478 1 478 1 478 478 0 100.000 100 100 N/A 0.0 972 -7 15 488 488 1 488 1 488 488 0 100.000 100 100 N/A 0.0 1004 -8 16 612 612 1 612 1 612 612 0 100.000 100 100 N/A 0.0 1241 -9 17 638 638 1 638 1 638 638 0 100.000 100 100 N/A 0.0 1326 -10 18 679 679 1 679 1 679 679 0 100.000 100 100 N/A 0.0 1419 -11 1 95 95 1 95 1 95 95 0 100.000 100 100 N/A 1.23e-68 191 -12 19 1645 1645 1 1645 1 1645 1645 0 100.000 100 100 N/A 0.0 3332 -13 2 109 109 1 109 1 109 109 0 100.000 100 100 N/A 3.09e-81 224 -14 3 139 139 1 139 1 139 139 0 100.000 100 100 N/A 2.34e-106 290 -15 4 148 148 1 148 1 148 148 0 100.000 100 100 N/A 1.04e-110 302 -16 5 181 181 1 181 1 181 181 0 100.000 100 100 N/A 1.79e-140 380 -17 6 202 202 1 202 1 202 202 0 100.000 100 100 N/A 3.71e-154 416 -18 7 214 214 1 214 1 214 214 0 100.000 100 100 N/A 1.75e-157 426 -19 8 228 228 1 228 1 228 228 0 100.000 100 100 N/A 6.32e-172 463 diff --git a/tests/test_data/outputs/search/G6/blast/protein/queries.fasta b/tests/test_data/outputs/search/G6/blast/protein/queries.fasta deleted file mode 100755 index 9f022f4..0000000 --- a/tests/test_data/outputs/search/G6/blast/protein/queries.fasta +++ /dev/null @@ -1,40 +0,0 @@ ->0 -MYDPPFLEALMITAS*FAIFIIIVVSVLLLEGD* ->1 -MRILLSNDDGVHAPGIQTLAKALREFADVQVVAPDRNRSGASNSLTLESSLRTFTFDNGDIAVQMGTPTDCVYLGVNALMRPRPDIVVSGINAGPNLGDDVIYSGTVAAAMEGRHLGFPALAVSLNGYQHYDTAAAVTCALLRGLSREPLRTGRILNVNVPDLPLAQVKGIRVTRCGSRHPADKVIPQEDPRGNTLYWIGPPGDKYDAGPDTDFAAVDEGYVSVTPLHVDLTAHSAHDVVSDWLDSVGVGTQW* ->2 -VNLVKTPGLHAAGKGINVANVLKDLGIDVTVGGFLGKDNQDGFQQLFSELGIANRFQVVQGRTRINVKLTEKDGEVTNFNFPGFDVTPADWERFVNDFLSWLGQFDMVCVSGSLPAGVSPEAFTDWMTRLRSQCPCIIVDTSRVELVAGLIAAPWLVKPNRRELEIWAGRKLPEMKDVIDAAPALREQGIAHVVISLGAEGALRVNASGEWIAKPPAVDVENTVGAGGSMVGGWIYGLLMRESTEHTLRLATAVAALAVSQSNVGITDRPQLAAMMARVDLQPFN* ->3 -MNKNKYSTPLLMLATILAGMLSPMQSAVNGQLGHWLQDGNACAVISFASGLVVMFFIIIARKETRQQFASIPTLIKKRKIPLWNWFAGLCGAMVVFSEGASASALGVATFQTALISALLLSGLLCDRFGIGVEEKKYFTPWRITGALFAVIATIFVVSPQWHSTSFILLAILPFLAGLLAGWQPAGNAKVAEATGSMLVSITWNFIVGFCVLGAALAIRIALGHVTIQLPDTWWMYLGGPLGLLSIGLMAILVRGLGLLMLGVASTAGQLLGSVLIDELIPSLGNTVYLVTIIGTLFALVGAIVTTIPEYRASKMAQKMEVSE* ->4 -MKGRWAKYVATGVMLAMLAACSSKPTDRGQQYKDGKFTQPFSLVNQPDAVGAPINAGDFAEQVDQIRSASPRLYTNQSNVYNAVQNWLRSGGDTRTMRQFGIDAWQMEGTDNYGNVQFTGYYTPVVQARHTRQGAFQYPIYSMPPKRGRLPSRAQIYAGALSDKYILAWSNSLMDNFIMDVQGSGYIDFGDGSPLNFFSYAGKNGWPYRSIGKVLIDRGEVKKEDMSMQAIREWGEKHSEAEVRELLEQNPSFVFFKPQSFAPVKGASAVPLIGRASVASDRSIIPPGTTLLAEVPLLDNNGKFSGQYELRLMVALDVGGAIKGQHFDIYQGIGPDAGHRAGWYNHYGRVWVLKSAPGAGNVFSG* ->5 -LFDRYDAGEQAVLVHIYFSQDKDMEDLQEFESLVYYAGVEAMQVITGSRKAPHPKYFVGEGKAVEIAEAVKATGAAVVLFDHALSPAQERNLERLCECRVIDRTGLILDIFAQRARTHEGKLQVELAQLRHLATRLVRGWTHLERRKGGIGSRGPGETQLEADRRLLRNRIVQIQSRLEKVEKQREQGRQSRIKADVPTVSLVGYTNAGKSTLFNQITEARVYAADQLFATLDPTLRRIDVADVGETVLADTVGFIRHLPYDLVAAFKATLQETRQATQLLHVVDAADVRVQENIEAVNTVLEEIDAHEFPTLMVMNKIDMLDDFEPRIDRDEENKPIRVWLSAQSGVGIPQLFQALTERLSGEVAQHTLRLLPQEGRLRSRFYQLQAIEKEWMEEDGSVSPQVRMPIVDWRRFCKQEPALIEYVI* ->6 -VAQRILVLGASGYIGQHLVFALSQQGHQVRAAARRVERLEKHRLANVSCHKVDLHWPENLPALLRDIDTVYYLVHGMGEGGDFIAHERQAALNVRDALRQTPVKQLIFLSSLQAPAHEQSDHLRARQLTADTLRDAGVPVTELRAGIIVGAGSAAFEVMRDMVYNLPILTPPRWVRSRTTPIALENLLYYLVGLLEHPAHEHRILEAAGPQVLSYQQQFERFMAVSGKRRPLIPVPFPTRWISVWFLNVITSVPPTTAKALIQGLRHDLLADDAALKKLIPQTLITFDDAVRRTLKEEEKLVNSSDWGYDALAFARWRPEYGYFPKQAGFTAQTPASLSALWQVVNRLGGKEGYFFGNILWQTRAAMDRLVGHKLAKGRPSHTLLKPGDTVDSWKVIIVEPEKQLTLLFGMKAPGLGRLSFTLHDKGRYREIDVRAWWHPHGMPGLIYWLLMIPAHLFIFRGMARRIARLAEQITEK* ->7 -MNKFARHFLPLSLRVRFLLATAGVVLVLSLAYGIVALVGYSVSFDKTTFRLLRGESNLFYTLAKWENNKICVELPENLDMQSPTMTVIYDETGKLLWTQRNIPWLIKSTQPEGLKTNGFHEIETNVDATSTLLSEDHSAQEKLKEVREDDDDAEMTHSVAVNIYPATTRMPQLTIVVVDTIPIELKGSYMVWSWFVYVLAANLLLVIPLLWIAAWWSLRPIDALSREVRELEDHHREMLNPETTRKLISLVRNLNQLLKSEHERYNKYRTSLTDLTHSLKTPLAVLQSTLRSLRNEKMSVSKAEPVMLEQISRISQQIGYYLHRASMRGSGVLLSRELHPVAPLLDNLISALNKVYLRKGVNISMDISPEISFVGEQNDFVEVMGNVLDNACKYCLEFVEISARQTDDHLHIFVEDDGPGIAHSKRSLVFDRGQRADTLRPGQGVGLAVAREITEQYAGQIIASDSLLGGARMEVVSGRQHPTQKEE* ->8 -MTIQKRLLEAVEQKLLRPIDAQFALTVAGNDDPAVTLAAALLSHDAGEGHVCLPLSRLTLTEEAHPLLVAWISETATPIDWKKRLLASAAVSCGDSPAPLILCGDRLYLNRMWCNERTVARFFNEVNQAIAVDEDQLSRILDALFPPTDEVNWQKVAAAVALTRRISVISGGPGTGKTTTVAKLLAALIQMADGERCRIRLAAPTGKAAARLTESLGAALRQLPLTDAQKKRIPEDASTLHRLLGAQPGSQRLRHHAGNPLHLDVLVVDEASMIDLPMMSRLIDALPPHGRVIFLGDRDQLASVEAGAVLGDICAYVNAGFTAERARQLSRLTGSAIPAGAGTQAASLRDSLCLLQKSYRFGSDSGIGKLAAAINCGDRSAIQAVFQQGFSDIEKRTLQSSDDYAGMLDEALAGYGRYLRLLHEKAAPEAILQAFNEYQLLCALREGPFGVRGLNDRIEQAMVQQRKIQRHPHSRWYEGRPVMIARNDSALGLFNGDIGIALDRGQGLRVWFVMPDGTIKSVQPSRLPEHDTTWAMTVHKSQGSEFDHAALILPSQRSPVVTRELVYTAVTRARRRLSLYADERILAGAIVTRTERRSGLATLFDEVSRIG* ->9 -MQEVAMSSQEASKMLRTYNIAWWGNNYYDVNELGHISVCPDPDVPEARVDLAKLVKAREAQGQRLPALFCFPQILQHRLRSINAAFKRARESYGYNGDYFLVYPIKVNQHRRVIESLIHSGEPLGLEAGSKAELMAVLAHAGMTRSVIVCNGYKDREYIRLALIGEKMGHKVYLVIEKMSEIAIVLEEAERLNVVPRLGVRARLASQGSGKWQSSGGEKSKFGLAATQVLQLVETLRDAGRLDSLQLLHFHLGSQMANIRDIATGVRESARFYVELHKLGVNIQCFDVGGGLGVDYEGTRSQSDCSVNYGLNEYANNIIWAIGDACEEHGLPHPTVITESGRAVTAHHTVLVSNIIGVERNEYTDPTAPAEDAPRALQNLWETWQEMHKPGTRRSLREWLHDSQMDLHDIHIGYSSGAFSLQERAWAEQLYLSMCHEVQKQLDPQNRAHRPIIDELQERMADKMYVNFSLFQSMPDAWGIDQLFPVLPLEGLDQVPERRAVLLDITCDSDGAIDHYIDGDGIATTMPMPEYDPENPPMLGFFMVGAYQEILGNMHNLFGDTEAVDVFVFPDGSVEVELSDEGDTVADMLQYVQLDPKTLLTHFRDQVKQTDLDDALQQQFLEEFEAGLYGYTYLEDE* ->10 -MNSLPQRSTDFELTTSQDGFALSWQQRLILRHSAENPCLWIGAGVADIDMFRGNFSIKDKLNEKIALTEATVSELPDGWLVQFSRGATISATLRISADEAGRLTLDLQNDDLHHNRIWLRLAANPDDHIYGCGEQFSYFDLRGKPFPLWTSEQGVGRNKTSYVTWQADCKENAGGDYYLTFFPQPTFVSTQKYYCHVDNSCYMNFDFSAPEYHELALWEDKTTLRFECADTYIALLEKLTALLGRQPELPDWVYDGVTLGIQGGTEVCQQKLDNMRNAGVKVYGIWAQDWSGIRMTSFGKRVMWNWKWNSDNYPQLDSRIKQWKEEGVQFLSYINPYVASDKDLCAEAARHGYLAKDATGGDYLVEFGEFYGGVVDLTNPEAYDWFKDVIKKNMIALGCSGWMADFGEYLPTDTYLHNGVSAELMHNAWPALWAKCNYEALQKTGKLGEILFFMRAGYTGSQKYSTMMWAGDQNVDWSLDDGLASVVPAALSLAMTGHGLHHSDIGGYTTLFDMKRSKELLLRWCDFSAFTPMMRTHEGNRPSNDWQFDGDAETIAHFARMTTVFTTLKPYLKQAVAQNAATGLPVMRPLFLHYENDAATYTLKYQYLLGQDLLVAPVHEQGRCDWTLYLPEDHWVNIWTGEAHHGGEISVDAPIGKPPVFYRAKSEWALLFASLRNI* ->11 -MKLFLTTAALTATLTSGMGFASDTVIPWATNSGGTESTHIAAMGEDVNAQHQHIIHTHEGVCAANSGTIQADEAALTSNKPPVQVQPELLPHQG* ->12 -MKHLRVVACMIMLALAGCDNNDKTAPTTKSEAPAVAQPSPAQDPSQLQKLAQQSQGKALTLLDASEAQLDGAATLVLTFSIPLDPEQDFSRVVHVVDKKSGSVDGAWELAPNLKELRLRHLEPERVLVVTVDPAVKALNNATFGKSYEKTITTRDVQPSVGFASRGSLLPGKIAEGLPVMALNVNHVDVNFFRVKPGSLASFVSQWEYRSSLSNWESDNLLKMADLVYTGRFDLNPARNTREKLLLPLSDIKPLQQAGVYVAVMNQAGHYNYSNAATLFTLSDIGVSAHRYHSRLDIFTQSLENGAAQSGIEIVLLNDKGQTLAQATSDAQGHVQLEADKAAALLLARKEGQTTLLDLTLPALDLSEFNVAGAPGYSKQFFMFGPRDLYRPGETVILNGLLRDSDGKTLPDQPVKLEVVKPDGQVMRTVVSQPENGLYRLNYPLDINAPTGLWHVRANTGDNLLRSWDFHVEDFMPERMALNLTAQKTPLAPADEVKFSVVGYYLYGAPANGNTLQGQLFLRPLRDAVAALPGFQFGNIAEENLSRSLDEVQLTLDKGGRGEVSAASQWQEAHSPLQVILQASLLESGGRPVTRRVEQAIWPADTLPGIRPQFAAKAVYDYRTDTTVNQPIVDEDSNAAFDIVYANAQGEKKAVSGLQVRLIRERRDYYWNWSESEGWQSQFDQKDLVEGEQTLDLNADETGKVSFPVEWGAYRLEVKAPNETVSSVRFWAGYSWQDNSDGSGAARPDRVTLKLDKANYRPGDTMKLHIAAPVAGKGYAMVESSDGPLWWQAIDVPAQGLELTIPVDKTWNRHDLYLSTLVVRPGDKSRSATPKRAVGLLHLPLGDDNRRLDLALESPAKMRPNQPLTVRVKASVKHGEMPKQINVLVSAVDSGVLNITDYATPDPWQAFFGQKRYGADIYDIYGQVIEGQGRLAALRFGGDGDDLTRGGKPPVNHANIIAQQAQPITLNEQGEGVVTLPIGDFNGELRVMAQAWTADDFGRGESKVVVAAPVIAELNMPRFLAGGDVSRLVLDVTNLTDRPQTLNIALAASGLLELLSQQPQPVNLAPGVRTTLFVPVRALEGFGEGEIQATISGLNLPGETLGAQHKQWQIGVRPAWPAQTVNSGIALAPGESWHVPEQHLANVSPATLQGQLLLSGKPPLNLARYIRELKAYPYGCLEQTTSGLFPALYTNAAQLQSLGITGDSDEKRRAAVDIGISRILQMQRDNGGFALWDENGAEEPWLTAYAMDFLIRAGEQGYSVPPEAINRGNERLLRYLQDPGTMLIRYSDNTQASTFAAQAYAALVLARQQKAPLGALREIWERRSQAASGLPLMQLGIALNTMGDARRGEEAITLALNTPRQDERQWIADYGSSLRDNALMLSLLEENNLRPDAQNALLSSLSEQAFGQRWLSTQENNALFLAAHSRQASAGAWQAQTSLEAQPLSGDKALTRNLDADQLAALEVTNTGSQPLWLRLDSSGYPSSAPEPASNVLQIERQILGTDGQRKSLSSLRSGELVLVWLTVVADRNVPDALVVDLLPAGLELENQNLADSSASLPESGSEVQNLLNQMQQADIQYMEFRDDRFVAAVVVNEGQPVTLVYLARAVTPGTYQLAQPQVESMYAPQWRATGASEGLLIVTP* ->13 -MAKNRSRRLRKKMHIDEFQELGFSVAWRFPEGTSEEQIDKTVDDFINDVIEPNKLAFDGSGYLAWEGLICMQEIGKCTEEHQAIVRKWLEARNLEEVRTSELFDVWWD* ->14 -LKLIPFYLLALFSASSGATEINACKDLIGTWKTPADNPPYTVTILPPVEACGEKCVKLNVEYELDVTHRNALYCHERQEGVKGQGPMVIAFEGAYGGHAIGTYNRQLQLLRAGVIPKNKKWKWITKMENYWFRRVKAH* ->15 -MKKFDSCHPVFLLIGCAQVPLRSSVSKPVQQPSAQKEQLANANGIDECQSLPYVPSDLAKNKSLSNQIADNTASKNSAISSRIFCEKYKQTKEQALTFFQELPQYMRSKEVEEQHMTEFKKVLLEPGSKNLSIYQTLLAAHERLQAL* ->16 -MRIKPDDNWRWYYDEEHDRMMLDLANGMLFRSRFSRKMLTPDAFCPTGFCVDDAALYFSFEEKCRDFELTKEQRAELVLNALVAIRYLKPQMPKSWHFVAHGEMWTPGTGDAASVWLSDTAEQVNLLVVEPGENAALCLLAQPGVVIAGRTMQLGDAIKIMNDRLKPQVHCHSFSLEQAV* ->17 -VLAFTLRFIKNKRYFAILAGALVIIAGLDSQHACSGNGLPQINGKALAALAKQHPVVVLFRHAERCDRSDNTCLPDSTGITVNGAQDARALGKAFSADIQNYNLYSSNTVRAIQSATWFSAGRSRRAVKKMMDCGSGIYSSINTLLKNSQIKNIVILTHNHCLTYIVKNKRGVKFDPEYLNALVMYAENGKLLLDGEFVPG* ->18 -MGSNYIVIEGLEGAGKTTARDVVVETLEQLGIRNMIFTREPGGTQLAEKLRSLVLDIRSVGDEVITDKAEVLMFYAARVQLVETVIKPALAQGVWVIGDRHDLSTQAYQGGGRGIDQTMLATLRDAVLGDFRPDLTLYLDVTPEVGLKRARARGDLDRIEQESFDFFNRTRARYLELAAQDSRIRTIDATQPLDAVMRDIRATVTKWVQEQAA* ->19 -MKHIKKSVLVVLLTSHVAHASIVVGGTRLVFDGNNDESSINVENKDSKANLVQSWLSVADPQVTNKQVFIITPPLFRLDAGQKNSIRVIRSGAPLPADRESMYWLNIKGIPSIDDNASANRVEISINTQIKLIYRPPALTKSTPDSQSQQLKWQTAGDVITVNNPTPYYMNFASVTLNSHEVKSATFVPPKSSASFKLSSTAAPHGTVTWRLISDYGMSLEPHSGSF* diff --git a/tests/test_data/outputs/search/G6/run.json b/tests/test_data/outputs/search/G6/run.json index 05fbed0..f83a795 100755 --- a/tests/test_data/outputs/search/G6/run.json +++ b/tests/test_data/outputs/search/G6/run.json @@ -1,11 +1,15 @@ { - "analysis_start_time": "10/06/2024 11:12:03", + "analysis_start_time": "19/06/2024 10:43:46", "parameters": { - "query": "locidex/extract/G6/raw.extracted.seqs.fasta", - "outdir": "locidex/search/G6", + "command": "search", + "query": "test_dev/extract/G6/raw.extracted.seqs.fasta", + "outdir": "test_dev/search/G6", + "db": "test_set/db", + "db_group": null, "name": "G6", - "db": "locidex/db", "config": null, + "db_name": "Locidex Test Database", + "db_version": "1.0.0", "min_evalue": 0.0001, "min_dna_len": 1, "min_aa_len": 1, @@ -20,8 +24,16 @@ "format": null, "translation_table": 11, "annotate": false, - "force": true + "force": true, + "db_date": "2024/06/06", + "db_author": "James Robertson", + "db_desc": "Using Salmonella GCA_000007545.2 as source data", + "db_num_seqs": 20, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" }, - "result_file": "locidex/search/G6/seq_store.json", - "analysis_end_time": "10/06/2024 11:12:06" + "result_file": "test_dev/search/G6/seq_store.json", + "analysis_end_time": "19/06/2024 10:43:47" } \ No newline at end of file diff --git a/tests/test_data/outputs/search/G6/seq_store.json b/tests/test_data/outputs/search/G6/seq_store.json index 29546b1..c5ec107 100755 --- a/tests/test_data/outputs/search/G6/seq_store.json +++ b/tests/test_data/outputs/search/G6/seq_store.json @@ -460,261 +460,301 @@ "parent_id": "locus_1:0:0:0", "locus_name": "locus_1:0:0:0", "seq_id": "locus_1:0:0:0", + "dna_seq": "atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtgattcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa", + "dna_ambig_count": 0, "dna_hash": "a47cc24760462371e919143c5cc81376", "dna_len": 102, + "aa_seq": "MYDPPFLEALMITAS*FAIFIIIVVSVLLLEGD*", "aa_hash": "d65fe5c591a0b644f991adbc1b300a75", "aa_len": 34, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 1, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 1 }, "1": { "parent_id": "locus_10:9:0:1", "locus_name": "locus_10:9:0:1", "seq_id": "locus_10:9:0:1", + "dna_seq": "atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa", + "dna_ambig_count": 0, "dna_hash": "fe04d17ec353c08b903c85fc0ca4dc02", "dna_len": 762, + "aa_seq": "MRILLSNDDGVHAPGIQTLAKALREFADVQVVAPDRNRSGASNSLTLESSLRTFTFDNGDIAVQMGTPTDCVYLGVNALMRPRPDIVVSGINAGPNLGDDVIYSGTVAAAMEGRHLGFPALAVSLNGYQHYDTAAAVTCALLRGLSREPLRTGRILNVNVPDLPLAQVKGIRVTRCGSRHPADKVIPQEDPRGNTLYWIGPPGDKYDAGPDTDFAAVDEGYVSVTPLHVDLTAHSAHDVVSDWLDSVGVGTQW*", "aa_hash": "988bf512f0362e276b0e5622fbaa7079", "aa_len": 254, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "2": { "parent_id": "locus_11:10:0:2", "locus_name": "locus_11:10:0:2", "seq_id": "locus_11:10:0:2", + "dna_seq": "gtgaacctggtgaaaaccccgggtctgcacgcggcgggcaaaggcattaacgttgccaatgtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccaacttcaactttcccggctttgacgtgactcccgcagactgggaacgctttgttaacgacttcctgagctggctgggtcagttcgatatggtctgcgtaagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatcgttgatactagccgtgtagagttagtcgccggtcttatagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcccgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgcgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggaaaataccgtcggcgcaggcggttccatggttggcggatggatttacggcctgctgatgcgcgagtccaccgaacatacgctgcgcctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga", + "dna_ambig_count": 0, "dna_hash": "5b128d659955716833ce42f2bb060212", "dna_len": 858, + "aa_seq": "VNLVKTPGLHAAGKGINVANVLKDLGIDVTVGGFLGKDNQDGFQQLFSELGIANRFQVVQGRTRINVKLTEKDGEVTNFNFPGFDVTPADWERFVNDFLSWLGQFDMVCVSGSLPAGVSPEAFTDWMTRLRSQCPCIIVDTSRVELVAGLIAAPWLVKPNRRELEIWAGRKLPEMKDVIDAAPALREQGIAHVVISLGAEGALRVNASGEWIAKPPAVDVENTVGAGGSMVGGWIYGLLMRESTEHTLRLATAVAALAVSQSNVGITDRPQLAAMMARVDLQPFN*", "aa_hash": "d6a46f107d0604f27820147b523948c8", "aa_len": 286, "start_codon": "gtg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "3": { "parent_id": "locus_12:11:0:3", "locus_name": "locus_12:11:0:3", "seq_id": "locus_12:11:0:3", + "dna_seq": "atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga", + "dna_ambig_count": 0, "dna_hash": "eb72da68c159497d5f0c8eeddc51b5ae", "dna_len": 972, + "aa_seq": "MNKNKYSTPLLMLATILAGMLSPMQSAVNGQLGHWLQDGNACAVISFASGLVVMFFIIIARKETRQQFASIPTLIKKRKIPLWNWFAGLCGAMVVFSEGASASALGVATFQTALISALLLSGLLCDRFGIGVEEKKYFTPWRITGALFAVIATIFVVSPQWHSTSFILLAILPFLAGLLAGWQPAGNAKVAEATGSMLVSITWNFIVGFCVLGAALAIRIALGHVTIQLPDTWWMYLGGPLGLLSIGLMAILVRGLGLLMLGVASTAGQLLGSVLIDELIPSLGNTVYLVTIIGTLFALVGAIVTTIPEYRASKMAQKMEVSE*", "aa_hash": "2fb77f6d0f615a030a08af74f3eda277", "aa_len": 324, "start_codon": "atg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "4": { "parent_id": "locus_13:12:0:4", "locus_name": "locus_13:12:0:4", "seq_id": "locus_13:12:0:4", + "dna_seq": "atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga", + "dna_ambig_count": 0, "dna_hash": "8f300259dcb46224bdc1fe5273107324", "dna_len": 1098, + "aa_seq": "MKGRWAKYVATGVMLAMLAACSSKPTDRGQQYKDGKFTQPFSLVNQPDAVGAPINAGDFAEQVDQIRSASPRLYTNQSNVYNAVQNWLRSGGDTRTMRQFGIDAWQMEGTDNYGNVQFTGYYTPVVQARHTRQGAFQYPIYSMPPKRGRLPSRAQIYAGALSDKYILAWSNSLMDNFIMDVQGSGYIDFGDGSPLNFFSYAGKNGWPYRSIGKVLIDRGEVKKEDMSMQAIREWGEKHSEAEVRELLEQNPSFVFFKPQSFAPVKGASAVPLIGRASVASDRSIIPPGTTLLAEVPLLDNNGKFSGQYELRLMVALDVGGAIKGQHFDIYQGIGPDAGHRAGWYNHYGRVWVLKSAPGAGNVFSG*", "aa_hash": "fd5140611af6e0d9f95426bf9beaa8b1", "aa_len": 366, "start_codon": "atg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "5": { "parent_id": "locus_14:13:0:5", "locus_name": "locus_14:13:0:5", "seq_id": "locus_14:13:0:5", + "dna_seq": "ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctattacgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccagcgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcggaaaggcgggattggttcgcgcggtccgggtgaaacccagctcgaagccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgagaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgcagctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaattccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtccgcaggtacgaatgccgattgtcgactggcgtcgcttctgtaaacaagaaccagcgttgatcgaatacgtgatctag", + "dna_ambig_count": 0, "dna_hash": "b9060019038526aa6fc38d2f7510edc6", "dna_len": 1281, + "aa_seq": "LFDRYDAGEQAVLVHIYFSQDKDMEDLQEFESLVYYAGVEAMQVITGSRKAPHPKYFVGEGKAVEIAEAVKATGAAVVLFDHALSPAQERNLERLCECRVIDRTGLILDIFAQRARTHEGKLQVELAQLRHLATRLVRGWTHLERRKGGIGSRGPGETQLEADRRLLRNRIVQIQSRLEKVEKQREQGRQSRIKADVPTVSLVGYTNAGKSTLFNQITEARVYAADQLFATLDPTLRRIDVADVGETVLADTVGFIRHLPYDLVAAFKATLQETRQATQLLHVVDAADVRVQENIEAVNTVLEEIDAHEFPTLMVMNKIDMLDDFEPRIDRDEENKPIRVWLSAQSGVGIPQLFQALTERLSGEVAQHTLRLLPQEGRLRSRFYQLQAIEKEWMEEDGSVSPQVRMPIVDWRRFCKQEPALIEYVI*", "aa_hash": "05bc7823b1abc2e6d4e2c08ca5325134", "aa_len": 427, "start_codon": "ttg", - "stop_codon": "tag", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tag", + "count_internal_stop": 0 }, "6": { "parent_id": "locus_15:14:0:6", "locus_name": "locus_15:14:0:6", "seq_id": "locus_15:14:0:6", + "dna_seq": "gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga", + "dna_ambig_count": 0, "dna_hash": "bc98c2fe196a68a79036814396513a8d", "dna_len": 1434, + "aa_seq": "VAQRILVLGASGYIGQHLVFALSQQGHQVRAAARRVERLEKHRLANVSCHKVDLHWPENLPALLRDIDTVYYLVHGMGEGGDFIAHERQAALNVRDALRQTPVKQLIFLSSLQAPAHEQSDHLRARQLTADTLRDAGVPVTELRAGIIVGAGSAAFEVMRDMVYNLPILTPPRWVRSRTTPIALENLLYYLVGLLEHPAHEHRILEAAGPQVLSYQQQFERFMAVSGKRRPLIPVPFPTRWISVWFLNVITSVPPTTAKALIQGLRHDLLADDAALKKLIPQTLITFDDAVRRTLKEEEKLVNSSDWGYDALAFARWRPEYGYFPKQAGFTAQTPASLSALWQVVNRLGGKEGYFFGNILWQTRAAMDRLVGHKLAKGRPSHTLLKPGDTVDSWKVIIVEPEKQLTLLFGMKAPGLGRLSFTLHDKGRYREIDVRAWWHPHGMPGLIYWLLMIPAHLFIFRGMARRIARLAEQITEK*", "aa_hash": "5d8c3cf06444c3741ebfc57cc92db0d3", "aa_len": 478, "start_codon": "gtg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "7": { "parent_id": "locus_16:15:0:7", "locus_name": "locus_16:15:0:7", "seq_id": "locus_16:15:0:7", + "dna_seq": "atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctcgtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatctgcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacggtgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcactcaaccggaagggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaaggctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgatgcgctgtcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtaagctgatcagccttgtgcgcaaccttaatcaactgctcaaaagcgagcacgaacgttataacaaataccgcacgagcctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatctgcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcatagcccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttctggccgacagcatcccacacagaaagaggaataa", + "dna_ambig_count": 0, "dna_hash": "16e55766c603fe33c9e75d8e81743ae2", "dna_len": 1464, + "aa_seq": "MNKFARHFLPLSLRVRFLLATAGVVLVLSLAYGIVALVGYSVSFDKTTFRLLRGESNLFYTLAKWENNKICVELPENLDMQSPTMTVIYDETGKLLWTQRNIPWLIKSTQPEGLKTNGFHEIETNVDATSTLLSEDHSAQEKLKEVREDDDDAEMTHSVAVNIYPATTRMPQLTIVVVDTIPIELKGSYMVWSWFVYVLAANLLLVIPLLWIAAWWSLRPIDALSREVRELEDHHREMLNPETTRKLISLVRNLNQLLKSEHERYNKYRTSLTDLTHSLKTPLAVLQSTLRSLRNEKMSVSKAEPVMLEQISRISQQIGYYLHRASMRGSGVLLSRELHPVAPLLDNLISALNKVYLRKGVNISMDISPEISFVGEQNDFVEVMGNVLDNACKYCLEFVEISARQTDDHLHIFVEDDGPGIAHSKRSLVFDRGQRADTLRPGQGVGLAVAREITEQYAGQIIASDSLLGGARMEVVSGRQHPTQKEE*", "aa_hash": "f85b3701f5642454bf4d2263feb13354", "aa_len": 488, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "8": { "parent_id": "locus_17:16:0:8", "locus_name": "locus_17:16:0:8", "seq_id": "locus_17:16:0:8", + "dna_seq": "atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa", + "dna_ambig_count": 0, "dna_hash": "a0d97d985483413f3c18bfe5833ae9ce", "dna_len": 1836, + "aa_seq": "MTIQKRLLEAVEQKLLRPIDAQFALTVAGNDDPAVTLAAALLSHDAGEGHVCLPLSRLTLTEEAHPLLVAWISETATPIDWKKRLLASAAVSCGDSPAPLILCGDRLYLNRMWCNERTVARFFNEVNQAIAVDEDQLSRILDALFPPTDEVNWQKVAAAVALTRRISVISGGPGTGKTTTVAKLLAALIQMADGERCRIRLAAPTGKAAARLTESLGAALRQLPLTDAQKKRIPEDASTLHRLLGAQPGSQRLRHHAGNPLHLDVLVVDEASMIDLPMMSRLIDALPPHGRVIFLGDRDQLASVEAGAVLGDICAYVNAGFTAERARQLSRLTGSAIPAGAGTQAASLRDSLCLLQKSYRFGSDSGIGKLAAAINCGDRSAIQAVFQQGFSDIEKRTLQSSDDYAGMLDEALAGYGRYLRLLHEKAAPEAILQAFNEYQLLCALREGPFGVRGLNDRIEQAMVQQRKIQRHPHSRWYEGRPVMIARNDSALGLFNGDIGIALDRGQGLRVWFVMPDGTIKSVQPSRLPEHDTTWAMTVHKSQGSEFDHAALILPSQRSPVVTRELVYTAVTRARRRLSLYADERILAGAIVTRTERRSGLATLFDEVSRIG*", "aa_hash": "bdd2b967a7a6337b5973dd723a62fa43", "aa_len": 612, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "9": { "parent_id": "locus_18:17:0:9", "locus_name": "locus_18:17:0:9", "seq_id": "locus_18:17:0:9", + "dna_seq": "atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag", + "dna_ambig_count": 0, "dna_hash": "b3021e979faa7600756c06dfadfcf14c", "dna_len": 1914, + "aa_seq": "MQEVAMSSQEASKMLRTYNIAWWGNNYYDVNELGHISVCPDPDVPEARVDLAKLVKAREAQGQRLPALFCFPQILQHRLRSINAAFKRARESYGYNGDYFLVYPIKVNQHRRVIESLIHSGEPLGLEAGSKAELMAVLAHAGMTRSVIVCNGYKDREYIRLALIGEKMGHKVYLVIEKMSEIAIVLEEAERLNVVPRLGVRARLASQGSGKWQSSGGEKSKFGLAATQVLQLVETLRDAGRLDSLQLLHFHLGSQMANIRDIATGVRESARFYVELHKLGVNIQCFDVGGGLGVDYEGTRSQSDCSVNYGLNEYANNIIWAIGDACEEHGLPHPTVITESGRAVTAHHTVLVSNIIGVERNEYTDPTAPAEDAPRALQNLWETWQEMHKPGTRRSLREWLHDSQMDLHDIHIGYSSGAFSLQERAWAEQLYLSMCHEVQKQLDPQNRAHRPIIDELQERMADKMYVNFSLFQSMPDAWGIDQLFPVLPLEGLDQVPERRAVLLDITCDSDGAIDHYIDGDGIATTMPMPEYDPENPPMLGFFMVGAYQEILGNMHNLFGDTEAVDVFVFPDGSVEVELSDEGDTVADMLQYVQLDPKTLLTHFRDQVKQTDLDDALQQQFLEEFEAGLYGYTYLEDE*", "aa_hash": "42c4a831ee79a27c47138fe96829814b", "aa_len": 638, "start_codon": "atg", - "stop_codon": "tag", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tag", + "count_internal_stop": 0 }, "10": { "parent_id": "locus_19:18:0:10", "locus_name": "locus_19:18:0:10", "seq_id": "locus_19:18:0:10", + "dna_seq": "atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacatcgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccacagtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggtcgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattacttgaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcatacagggcggtacggaagtttgtcagcaaaaactggataacatgcgcaacgcaggcgtaaaagtgtacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggggtcagcgcagagctcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacagaagaccggcaagctcggcgagatcctgttctttatgcgggcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccagcaatgactggcagttcgacggcgacgcggaaactattgcacactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgagatttccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa", + "dna_ambig_count": 0, "dna_hash": "a012eee23637b48e39b00808a057e35d", "dna_len": 2037, + "aa_seq": "MNSLPQRSTDFELTTSQDGFALSWQQRLILRHSAENPCLWIGAGVADIDMFRGNFSIKDKLNEKIALTEATVSELPDGWLVQFSRGATISATLRISADEAGRLTLDLQNDDLHHNRIWLRLAANPDDHIYGCGEQFSYFDLRGKPFPLWTSEQGVGRNKTSYVTWQADCKENAGGDYYLTFFPQPTFVSTQKYYCHVDNSCYMNFDFSAPEYHELALWEDKTTLRFECADTYIALLEKLTALLGRQPELPDWVYDGVTLGIQGGTEVCQQKLDNMRNAGVKVYGIWAQDWSGIRMTSFGKRVMWNWKWNSDNYPQLDSRIKQWKEEGVQFLSYINPYVASDKDLCAEAARHGYLAKDATGGDYLVEFGEFYGGVVDLTNPEAYDWFKDVIKKNMIALGCSGWMADFGEYLPTDTYLHNGVSAELMHNAWPALWAKCNYEALQKTGKLGEILFFMRAGYTGSQKYSTMMWAGDQNVDWSLDDGLASVVPAALSLAMTGHGLHHSDIGGYTTLFDMKRSKELLLRWCDFSAFTPMMRTHEGNRPSNDWQFDGDAETIAHFARMTTVFTTLKPYLKQAVAQNAATGLPVMRPLFLHYENDAATYTLKYQYLLGQDLLVAPVHEQGRCDWTLYLPEDHWVNIWTGEAHHGGEISVDAPIGKPPVFYRAKSEWALLFASLRNI*", "aa_hash": "cb1202450e68e2b4f0d557a645f1a98d", "aa_len": 679, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "11": { "parent_id": "locus_2:1:0:11", "locus_name": "locus_2:1:0:11", "seq_id": "locus_2:1:0:11", + "dna_seq": "atgaaactttttctgacaacggcagcgctaactgcgaccttgacttccggcatgggatttgccagcgataccgtgatcccctgggcgactaacagtggcggtacggaaagcacgcatatcgcggcaatgggcgaggatgtgaatgcgcagcatcagcacatcatccacacgcatgaaggcgtatgtgccgcaaattccggcaccattcaggctgatgaagccgcgctgaccagcaacaagccaccggtacaagtccaaccggaattgttgccgcatcagggctaa", + "dna_ambig_count": 0, "dna_hash": "e35184c8ff18e9116fc8faef20532f56", "dna_len": 285, + "aa_seq": "MKLFLTTAALTATLTSGMGFASDTVIPWATNSGGTESTHIAAMGEDVNAQHQHIIHTHEGVCAANSGTIQADEAALTSNKPPVQVQPELLPHQG*", "aa_hash": "2a1a77c25ad681437705d9145aef608c", "aa_len": 95, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "12": { "parent_id": "locus_20:19:0:12", "locus_name": "locus_20:19:0:12", "seq_id": "locus_20:19:0:12", + "dna_seq": "atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa", + "dna_ambig_count": 0, "dna_hash": "4461918e985715e4a2b07494e1f91326", "dna_len": 4935, + "aa_seq": "MKHLRVVACMIMLALAGCDNNDKTAPTTKSEAPAVAQPSPAQDPSQLQKLAQQSQGKALTLLDASEAQLDGAATLVLTFSIPLDPEQDFSRVVHVVDKKSGSVDGAWELAPNLKELRLRHLEPERVLVVTVDPAVKALNNATFGKSYEKTITTRDVQPSVGFASRGSLLPGKIAEGLPVMALNVNHVDVNFFRVKPGSLASFVSQWEYRSSLSNWESDNLLKMADLVYTGRFDLNPARNTREKLLLPLSDIKPLQQAGVYVAVMNQAGHYNYSNAATLFTLSDIGVSAHRYHSRLDIFTQSLENGAAQSGIEIVLLNDKGQTLAQATSDAQGHVQLEADKAAALLLARKEGQTTLLDLTLPALDLSEFNVAGAPGYSKQFFMFGPRDLYRPGETVILNGLLRDSDGKTLPDQPVKLEVVKPDGQVMRTVVSQPENGLYRLNYPLDINAPTGLWHVRANTGDNLLRSWDFHVEDFMPERMALNLTAQKTPLAPADEVKFSVVGYYLYGAPANGNTLQGQLFLRPLRDAVAALPGFQFGNIAEENLSRSLDEVQLTLDKGGRGEVSAASQWQEAHSPLQVILQASLLESGGRPVTRRVEQAIWPADTLPGIRPQFAAKAVYDYRTDTTVNQPIVDEDSNAAFDIVYANAQGEKKAVSGLQVRLIRERRDYYWNWSESEGWQSQFDQKDLVEGEQTLDLNADETGKVSFPVEWGAYRLEVKAPNETVSSVRFWAGYSWQDNSDGSGAARPDRVTLKLDKANYRPGDTMKLHIAAPVAGKGYAMVESSDGPLWWQAIDVPAQGLELTIPVDKTWNRHDLYLSTLVVRPGDKSRSATPKRAVGLLHLPLGDDNRRLDLALESPAKMRPNQPLTVRVKASVKHGEMPKQINVLVSAVDSGVLNITDYATPDPWQAFFGQKRYGADIYDIYGQVIEGQGRLAALRFGGDGDDLTRGGKPPVNHANIIAQQAQPITLNEQGEGVVTLPIGDFNGELRVMAQAWTADDFGRGESKVVVAAPVIAELNMPRFLAGGDVSRLVLDVTNLTDRPQTLNIALAASGLLELLSQQPQPVNLAPGVRTTLFVPVRALEGFGEGEIQATISGLNLPGETLGAQHKQWQIGVRPAWPAQTVNSGIALAPGESWHVPEQHLANVSPATLQGQLLLSGKPPLNLARYIRELKAYPYGCLEQTTSGLFPALYTNAAQLQSLGITGDSDEKRRAAVDIGISRILQMQRDNGGFALWDENGAEEPWLTAYAMDFLIRAGEQGYSVPPEAINRGNERLLRYLQDPGTMLIRYSDNTQASTFAAQAYAALVLARQQKAPLGALREIWERRSQAASGLPLMQLGIALNTMGDARRGEEAITLALNTPRQDERQWIADYGSSLRDNALMLSLLEENNLRPDAQNALLSSLSEQAFGQRWLSTQENNALFLAAHSRQASAGAWQAQTSLEAQPLSGDKALTRNLDADQLAALEVTNTGSQPLWLRLDSSGYPSSAPEPASNVLQIERQILGTDGQRKSLSSLRSGELVLVWLTVVADRNVPDALVVDLLPAGLELENQNLADSSASLPESGSEVQNLLNQMQQADIQYMEFRDDRFVAAVVVNEGQPVTLVYLARAVTPGTYQLAQPQVESMYAPQWRATGASEGLLIVTP*", "aa_hash": "ffe361d9273afbab0ed9009dd5aa4041", "aa_len": 1645, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "13": { "parent_id": "locus_3:2:0:13", "locus_name": "locus_3:2:0:13", "seq_id": "locus_3:2:0:13", + "dna_seq": "atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa", + "dna_ambig_count": 0, "dna_hash": "670705cd2a59c4a23a897ac656a888fe", "dna_len": 327, + "aa_seq": "MAKNRSRRLRKKMHIDEFQELGFSVAWRFPEGTSEEQIDKTVDDFINDVIEPNKLAFDGSGYLAWEGLICMQEIGKCTEEHQAIVRKWLEARNLEEVRTSELFDVWWD*", "aa_hash": "f46f4f7b2362abaf1440c9752e10670e", "aa_len": 109, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "14": { "parent_id": "locus_4:3:0:14", "locus_name": "locus_4:3:0:14", "seq_id": "locus_4:3:0:14", + "dna_seq": "ctgaaactaatacctttttatttgctcgcgctattttctgcctcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaacccctgcagataacccgccttataccgtgacaatattgccgccagtagaagcctgcggggaaaaatgtgtgaaactgaacgtagagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaacgacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctacaaccgacagctgcaattacttagggctggcgtaataccgaaaaataaaaagtggaaatggataacgaagatggaaaattactggttcaggcgggtaaaagcgcactaa", + "dna_ambig_count": 0, "dna_hash": "ac1b21798c0f672ad26f5a91ea278590", "dna_len": 417, + "aa_seq": "LKLIPFYLLALFSASSGATEINACKDLIGTWKTPADNPPYTVTILPPVEACGEKCVKLNVEYELDVTHRNALYCHERQEGVKGQGPMVIAFEGAYGGHAIGTYNRQLQLLRAGVIPKNKKWKWITKMENYWFRRVKAH*", "aa_hash": "dbcec3a0e9ecdc165c4e9162b079f2ee", "aa_len": 139, "start_codon": "ctg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "15": { "parent_id": "locus_5:4:0:15", "locus_name": "locus_5:4:0:15", "seq_id": "locus_5:4:0:15", + "dna_seq": "atgaaaaaatttgatagctgtcatcctgtctttttactgatcggctgtgcccaggtgcctctccgttcctccgtgagcaaaccggtacagcaacctagcgctcagaaagagcaactggccaacgcaaatggtattgatgagtgtcagtcacttccgtatgtgccttcagaccttgcgaagaataaatcattatcaaaccagatcgctgataataccgcatcaaaaaatagcgcaatcagctcacgcattttttgcgaaaaatataagcaaaccaaagaacaggcgcttaccttcttccaggaacttccacaatacatgcgttcgaaagaagttgaagagcaacacatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa", + "dna_ambig_count": 0, "dna_hash": "d00defcca8588f21ce16fa1d0ac13389", "dna_len": 444, + "aa_seq": "MKKFDSCHPVFLLIGCAQVPLRSSVSKPVQQPSAQKEQLANANGIDECQSLPYVPSDLAKNKSLSNQIADNTASKNSAISSRIFCEKYKQTKEQALTFFQELPQYMRSKEVEEQHMTEFKKVLLEPGSKNLSIYQTLLAAHERLQAL*", "aa_hash": "82d8baa0a3dad18a0efd8104ee15baae", "aa_len": 148, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "16": { "parent_id": "locus_6:5:0:16", "locus_name": "locus_6:5:0:16", "seq_id": "locus_6:5:0:16", + "dna_seq": "atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa", + "dna_ambig_count": 0, "dna_hash": "a11561f2804e2c32c78049f8b9aeb517", "dna_len": 543, + "aa_seq": "MRIKPDDNWRWYYDEEHDRMMLDLANGMLFRSRFSRKMLTPDAFCPTGFCVDDAALYFSFEEKCRDFELTKEQRAELVLNALVAIRYLKPQMPKSWHFVAHGEMWTPGTGDAASVWLSDTAEQVNLLVVEPGENAALCLLAQPGVVIAGRTMQLGDAIKIMNDRLKPQVHCHSFSLEQAV*", "aa_hash": "f7a904be6607ca5a6d9388ad9ca3693a", "aa_len": 181, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "17": { "parent_id": "locus_7:6:0:17", "locus_name": "locus_7:6:0:17", "seq_id": "locus_7:6:0:17", + "dna_seq": "gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggactcacaacatgcctgtagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgccagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcgccatccagtcggcaacctggttttcggcaggcaggtcgcgtagggcggttaaaaaaatgatggattgcggcagcggtatttattcttcaatcaatacgttactgaaaaattcgcagattaaaaatatagtcattttaacccataatcattgtttgacgtatattgtaaaaaataagcgcggcgtgaagtttgatccggagtatcttaatgcgctcgttatgtatgcggaaaacggaaaactccttttagacggtgaatttgttcccggctag", + "dna_ambig_count": 0, "dna_hash": "dc94bf1ec4ff9bed2a1f460cbd958656", "dna_len": 606, + "aa_seq": "VLAFTLRFIKNKRYFAILAGALVIIAGLDSQHACSGNGLPQINGKALAALAKQHPVVVLFRHAERCDRSDNTCLPDSTGITVNGAQDARALGKAFSADIQNYNLYSSNTVRAIQSATWFSAGRSRRAVKKMMDCGSGIYSSINTLLKNSQIKNIVILTHNHCLTYIVKNKRGVKFDPEYLNALVMYAENGKLLLDGEFVPG*", "aa_hash": "da78b534d889d8f35bec304ef54f1b93", "aa_len": 202, "start_codon": "gtg", - "stop_codon": "tag", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tag", + "count_internal_stop": 0 }, "18": { "parent_id": "locus_8:7:0:18", "locus_name": "locus_8:7:0:18", "seq_id": "locus_8:7:0:18", + "dna_seq": "atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga", + "dna_ambig_count": 0, "dna_hash": "7ebe74afecf146ec4db816c8deced64f", "dna_len": 642, + "aa_seq": "MGSNYIVIEGLEGAGKTTARDVVVETLEQLGIRNMIFTREPGGTQLAEKLRSLVLDIRSVGDEVITDKAEVLMFYAARVQLVETVIKPALAQGVWVIGDRHDLSTQAYQGGGRGIDQTMLATLRDAVLGDFRPDLTLYLDVTPEVGLKRARARGDLDRIEQESFDFFNRTRARYLELAAQDSRIRTIDATQPLDAVMRDIRATVTKWVQEQAA*", "aa_hash": "ad5a47cb3a55985083b963b0cf67f382", "aa_len": 214, "start_codon": "atg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "19": { "parent_id": "locus_9:8:0:19", "locus_name": "locus_9:8:0:19", "seq_id": "locus_9:8:0:19", + "dna_seq": "atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag", + "dna_ambig_count": 0, "dna_hash": "41ebb36872854b2b33c8c028e23d8ad1", "dna_len": 684, + "aa_seq": "MKHIKKSVLVVLLTSHVAHASIVVGGTRLVFDGNNDESSINVENKDSKANLVQSWLSVADPQVTNKQVFIITPPLFRLDAGQKNSIRVIRSGAPLPADRESMYWLNIKGIPSIDDNASANRVEISINTQIKLIYRPPALTKSTPDSQSQQLKWQTAGDVITVNNPTPYYMNFASVTLNSHEVKSATFVPPKSSASFKLSSTAAPHGTVTWRLISDYGMSLEPHSGSF*", "aa_hash": "02989d1dbb505d15f06e76ddc9174b7e", "aa_len": 228, "start_codon": "atg", - "stop_codon": "tag", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tag", + "count_internal_stop": 0 } }, "query_hit_columns": [], diff --git a/tests/test_data/outputs/search/G7/blast/nucleotide/hsps.txt b/tests/test_data/outputs/search/G7/blast/nucleotide/hsps.txt deleted file mode 100755 index 61d17b6..0000000 --- a/tests/test_data/outputs/search/G7/blast/nucleotide/hsps.txt +++ /dev/null @@ -1,20 +0,0 @@ -0 0 102 102 1 102 1 102 102 0 100.000 100 100 plus 1.80e-51 189 -1 9 762 762 3 762 3 762 760 0 100.000 99 99 plus 0.0 1404 -2 10 858 858 1 858 1 858 858 19 97.786 100 100 plus 0.0 1480 -3 11 972 972 1 972 1 972 972 0 100.000 100 100 plus 0.0 1796 -4 12 1098 1098 1 1098 1 1098 1098 0 100.000 100 100 plus 0.0 2028 -5 13 1281 1281 1 1281 1 1281 1281 11 99.141 100 100 plus 0.0 2305 -6 14 1434 1434 1 1434 1 1434 1434 0 100.000 100 100 plus 0.0 2649 -7 15 1464 1464 1 1464 1 1464 1464 15 98.975 100 100 plus 0.0 2621 -8 16 1836 1836 1 1836 1 1836 1836 0 100.000 100 100 plus 0.0 3391 -9 17 1914 1914 1 1914 1 1914 1914 0 100.000 100 100 plus 0.0 3535 -10 18 2037 2037 1 2037 1 2037 2037 16 99.215 100 100 plus 0.0 3674 -11 1 285 285 1 285 1 285 285 17 94.035 100 100 plus 2.31e-124 433 -12 19 4935 4935 1 4935 1 4935 4935 0 100.000 100 100 plus 0.0 9114 -13 2 327 327 1 327 1 327 327 0 100.000 100 100 plus 5.34e-176 604 -14 3 417 417 1 417 1 417 417 11 97.362 100 100 plus 0.0 710 -15 4 444 444 1 444 1 444 444 15 96.622 100 100 plus 0.0 737 -16 5 543 543 1 543 1 543 543 0 100.000 100 100 plus 0.0 1003 -17 6 606 606 1 606 1 606 606 15 97.525 100 100 plus 0.0 1037 -18 7 642 642 1 642 1 642 642 0 100.000 100 100 plus 0.0 1186 -19 8 684 684 1 684 1 684 684 0 100.000 100 100 plus 0.0 1264 diff --git a/tests/test_data/outputs/search/G7/blast/nucleotide/queries.fasta b/tests/test_data/outputs/search/G7/blast/nucleotide/queries.fasta deleted file mode 100755 index b762e62..0000000 --- a/tests/test_data/outputs/search/G7/blast/nucleotide/queries.fasta +++ /dev/null @@ -1,40 +0,0 @@ ->0 -atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa ->1 -gggcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa ->2 -gtgaacctggtgaaaaccacgggtctgcacgcggcgggcaaaggcattaacgttgccaaagtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccgacttcaacttttccggctttgacgtgactcccgcagactgggaacgctttgttaacgactccctgagctggctgggtcagttcgatatggtctgcgtcagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatctttgatagtagccgtgaagcgttagtcgccggtcttaaagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcacgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgtgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggtaagtaccgtcggcgcaggcgattccatggttggcggattgatttacggcctgctgatgcgcgagtccagcgaacatacgctgcgtctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga ->3 -atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga ->4 -atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga ->5 -ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctcttccgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccaacgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcagaaaggcgggattggtttgcgcggtccgggtgaaacccagctcgaaaccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgaaaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgctgctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaaatccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtctgcaggtacgaatgccgattgtcgactggcgtcgcctctgtaaacaagaaccagcgttgatcgaatacgtgatctag ->6 -gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga ->7 -atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctggtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatcagcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacgctgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcattcaaccggaatggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaacgctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgaggcgctggcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtgagctgaccagccttgtgcgcaaccttaatcaactgctcaaaagcgagcgcgaacgttataacaaataccgcacgaccctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatcagcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcataccccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttttggccgacagcatcccacacagaaagaggaataa ->8 -atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa ->9 -atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag ->10 -atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacattgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccaccgtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggccgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattactggaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcattcagggcggtacggaagtttgtcagcaaaaactggataccatgcgcaacgcaggcgtaaaagtgaacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggcgtcagcgcagagatcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacaggagaccggcaagctcggcgagatcctgttctttatgcgtgcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccggcaataactggcagttcgacggcgacgcggaaactattgcccactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgaaattaccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa ->11 -atgaaactttttctgacaacggcagcgctaactgcgaccttgatttccggcatggcatttgccagcgatcccgtcatcccctgggcgactaacagcggcggtacggaaagcacgcatattgcggcaatgggcgaggatttgaatgcgcagcatcagcagatcacccacacgcatgaaggcgtatgggcggccaattccggcagcattcaggctgatgaagccgcgctgaccagcaacaagccgccggtacaaggccacccggaattgatgccgcatcagggctaa ->12 -atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa ->13 -atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa ->14 -ctgaaactaatacctttttatttgctcgcgctattttctgccgcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaaccactgcagataacccgccttataccatgacaatattgccgccagtagaaggctgcggggaaaaatgtgtgaaactgaacgtacagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaaggacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctataaccgacagctgcaattactttgggctggcgtaataccgaaaaataaaaaggggaaatggataacgaaaatggaaaattactggttcaggcaggtaaaagcgcactaa ->15 -atgaaaaaattttatagctgtcttcctgtctttttactgatcggctgtgcccaggtgcccctcccttcctccgtgagcaaaccggtacagcaacctggcgctcagaaagagcaactggccaacgcaaatagtattgatgagtgtcagtctcttccgtatgtgccgtcagaccttgcgaagaataaatcattatcaaaccagaacgctgataattccgcatcaaaaaatagcgcaatcagctcaagcattttttgcgaaaaatataaacaaaccaaagaacaggcgcttaccttcttccaggaacatccacaatacatgcgttcgaaagaagatgaagagcaactcatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa ->16 -atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa ->17 -gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggcctcacaacatgcctggagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgtcagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcaccatccagtcggcaacctggttttccgcaggcaggtcgcttacggcggataaaaaaatgatggattgcggcagcggtatttatgcttcaatcaatacgttactgaaaaagtcgcagaataaaaatatagtcatttttacccataatcattgtttgacgtatattgcaaaaaataagcgcggcgtgaagtttgatccggactatcttaatgcgctcgttatgtatgcggaaaacggaaaactctttttagacggtgaatttgttcccggctag ->18 -atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga ->19 -atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/search/G7/blast/protein/hsps.txt b/tests/test_data/outputs/search/G7/blast/protein/hsps.txt deleted file mode 100755 index 194fd46..0000000 --- a/tests/test_data/outputs/search/G7/blast/protein/hsps.txt +++ /dev/null @@ -1,20 +0,0 @@ -0 0 34 34 1 34 1 34 34 0 100.000 100 100 N/A 4.94e-20 64.3 -1 9 254 254 2 254 2 254 253 0 100.000 99 99 N/A 0.0 512 -2 10 286 286 1 286 1 286 286 17 94.056 100 100 N/A 0.0 518 -3 11 324 324 1 324 1 324 324 0 100.000 100 100 N/A 0.0 640 -4 12 366 366 1 366 1 366 366 0 100.000 100 100 N/A 0.0 754 -5 13 427 427 1 427 1 427 427 9 97.892 100 100 N/A 0.0 846 -6 14 478 478 1 478 1 478 478 0 100.000 100 100 N/A 0.0 972 -7 15 488 488 1 488 1 488 488 14 97.131 100 100 N/A 0.0 972 -8 16 612 612 1 612 1 612 612 0 100.000 100 100 N/A 0.0 1241 -9 17 638 638 1 638 1 638 638 0 100.000 100 100 N/A 0.0 1326 -10 18 679 679 1 679 1 679 679 8 98.822 100 100 N/A 0.0 1403 -11 1 95 95 1 95 1 95 95 11 88.421 100 100 N/A 2.89e-51 147 -12 19 1645 1645 1 1645 1 1645 1645 0 100.000 100 100 N/A 0.0 3332 -13 2 109 109 1 109 1 109 109 0 100.000 100 100 N/A 3.09e-81 224 -14 3 139 139 1 139 1 139 139 9 93.525 100 100 N/A 3.70e-98 269 -15 4 148 148 1 148 1 148 148 11 92.568 100 100 N/A 1.56e-97 268 -16 5 181 181 1 181 1 181 181 0 100.000 100 100 N/A 1.79e-140 380 -17 6 202 202 1 202 1 202 202 14 93.069 100 100 N/A 2.36e-141 384 -18 7 214 214 1 214 1 214 214 0 100.000 100 100 N/A 1.75e-157 426 -19 8 228 228 1 228 1 228 228 0 100.000 100 100 N/A 6.32e-172 463 diff --git a/tests/test_data/outputs/search/G7/blast/protein/queries.fasta b/tests/test_data/outputs/search/G7/blast/protein/queries.fasta deleted file mode 100755 index cad8adb..0000000 --- a/tests/test_data/outputs/search/G7/blast/protein/queries.fasta +++ /dev/null @@ -1,40 +0,0 @@ ->0 -MYDPPFLEALMITASFFAIFIIIVVSVLLLEGD* ->1 -GRILLSNDDGVHAPGIQTLAKALREFADVQVVAPDRNRSGASNSLTLESSLRTFTFDNGDIAVQMGTPTDCVYLGVNALMRPRPDIVVSGINAGPNLGDDVIYSGTVAAAMEGRHLGFPALAVSLNGYQHYDTAAAVTCALLRGLSREPLRTGRILNVNVPDLPLAQVKGIRVTRCGSRHPADKVIPQEDPRGNTLYWIGPPGDKYDAGPDTDFAAVDEGYVSVTPLHVDLTAHSAHDVVSDWLDSVGVGTQW* ->2 -VNLVKTTGLHAAGKGINVAKVLKDLGIDVTVGGFLGKDNQDGFQQLFSELGIANRFQVVQGRTRINVKLTEKDGEVTDFNFSGFDVTPADWERFVNDSLSWLGQFDMVCVSGSLPAGVSPEAFTDWMTRLRSQCPCIIFDSSREALVAGLKAAPWLVKPNRRELEIWAGRKLPEMKDVIDAAHALREQGIAHVVISLGAEGALWVNASGEWIAKPPAVDVVSTVGAGDSMVGGLIYGLLMRESSEHTLRLATAVAALAVSQSNVGITDRPQLAAMMARVDLQPFN* ->3 -MNKNKYSTPLLMLATILAGMLSPMQSAVNGQLGHWLQDGNACAVISFASGLVVMFFIIIARKETRQQFASIPTLIKKRKIPLWNWFAGLCGAMVVFSEGASASALGVATFQTALISALLLSGLLCDRFGIGVEEKKYFTPWRITGALFAVIATIFVVSPQWHSTSFILLAILPFLAGLLAGWQPAGNAKVAEATGSMLVSITWNFIVGFCVLGAALAIRIALGHVTIQLPDTWWMYLGGPLGLLSIGLMAILVRGLGLLMLGVASTAGQLLGSVLIDELIPSLGNTVYLVTIIGTLFALVGAIVTTIPEYRASKMAQKMEVSE* ->4 -MKGRWAKYVATGVMLAMLAACSSKPTDRGQQYKDGKFTQPFSLVNQPDAVGAPINAGDFAEQVDQIRSASPRLYTNQSNVYNAVQNWLRSGGDTRTMRQFGIDAWQMEGTDNYGNVQFTGYYTPVVQARHTRQGAFQYPIYSMPPKRGRLPSRAQIYAGALSDKYILAWSNSLMDNFIMDVQGSGYIDFGDGSPLNFFSYAGKNGWPYRSIGKVLIDRGEVKKEDMSMQAIREWGEKHSEAEVRELLEQNPSFVFFKPQSFAPVKGASAVPLIGRASVASDRSIIPPGTTLLAEVPLLDNNGKFSGQYELRLMVALDVGGAIKGQHFDIYQGIGPDAGHRAGWYNHYGRVWVLKSAPGAGNVFSG* ->5 -LFDRYDAGEQAVLVHIYFSQDKDMEDLQEFESLVSSAGVEAMQVITGSRKAPHPKYFVGEGKAVEIAEAVKATGAAVVLFDHALSPAQERNLERLCECRVIDRTGLILDIFAQRARTHEGKLQVELAQLRHLATRLVRGWTHLERQKGGIGLRGPGETQLETDRRLLRNRIVQIQSRLEKVEKQREQGRQSRIKADVPTVSLVGYTNAGKSTLFNQITEARVYAADQLFATLDPTLRRIDVADVGETVLADTVGFIRHLPYDLVAAFKATLQETRQATLLLHVVDAADVRVQENIEAVNTVLEEIDAHEIPTLMVMNKIDMLDDFEPRIDRDEENKPIRVWLSAQSGVGIPQLFQALTERLSGEVAQHTLRLLPQEGRLRSRFYQLQAIEKEWMEEDGSVSLQVRMPIVDWRRLCKQEPALIEYVI* ->6 -VAQRILVLGASGYIGQHLVFALSQQGHQVRAAARRVERLEKHRLANVSCHKVDLHWPENLPALLRDIDTVYYLVHGMGEGGDFIAHERQAALNVRDALRQTPVKQLIFLSSLQAPAHEQSDHLRARQLTADTLRDAGVPVTELRAGIIVGAGSAAFEVMRDMVYNLPILTPPRWVRSRTTPIALENLLYYLVGLLEHPAHEHRILEAAGPQVLSYQQQFERFMAVSGKRRPLIPVPFPTRWISVWFLNVITSVPPTTAKALIQGLRHDLLADDAALKKLIPQTLITFDDAVRRTLKEEEKLVNSSDWGYDALAFARWRPEYGYFPKQAGFTAQTPASLSALWQVVNRLGGKEGYFFGNILWQTRAAMDRLVGHKLAKGRPSHTLLKPGDTVDSWKVIIVEPEKQLTLLFGMKAPGLGRLSFTLHDKGRYREIDVRAWWHPHGMPGLIYWLLMIPAHLFIFRGMARRIARLAEQITEK* ->7 -MNKFARHFLPLSLRVRFLLATAGVVLVLSLAYGIVALVGYSVSFDKTTFRLLRGESNLFYTLAKWENNKISVELPENLDMQSPTMTLIYDETGKLLWTQRNIPWLIKSIQPEWLKTNGFHEIETNVDATSTLLSEDHSAQEKLKEVREDDDDAEMTHSVAVNIYPATTRMPQLTIVVVDTIPIELKRSYMVWSWFVYVLAANLLLVIPLLWIAAWWSLRPIEALAREVRELEDHHREMLNPETTRELTSLVRNLNQLLKSERERYNKYRTTLTDLTHSLKTPLAVLQSTLRSLRNEKMSVSKAEPVMLEQISRISQQIGYYLHRASMRGSGVLLSRELHPVAPLLDNLISALNKVYQRKGVNISMDISPEISFVGEQNDFVEVMGNVLDNACKYCLEFVEISARQTDDHLHIFVEDDGPGIPHSKRSLVFDRGQRADTLRPGQGVGLAVAREITEQYAGQIIASDSLLGGARMEVVFGRQHPTQKEE* ->8 -MTIQKRLLEAVEQKLLRPIDAQFALTVAGNDDPAVTLAAALLSHDAGEGHVCLPLSRLTLTEEAHPLLVAWISETATPIDWKKRLLASAAVSCGDSPAPLILCGDRLYLNRMWCNERTVARFFNEVNQAIAVDEDQLSRILDALFPPTDEVNWQKVAAAVALTRRISVISGGPGTGKTTTVAKLLAALIQMADGERCRIRLAAPTGKAAARLTESLGAALRQLPLTDAQKKRIPEDASTLHRLLGAQPGSQRLRHHAGNPLHLDVLVVDEASMIDLPMMSRLIDALPPHGRVIFLGDRDQLASVEAGAVLGDICAYVNAGFTAERARQLSRLTGSAIPAGAGTQAASLRDSLCLLQKSYRFGSDSGIGKLAAAINCGDRSAIQAVFQQGFSDIEKRTLQSSDDYAGMLDEALAGYGRYLRLLHEKAAPEAILQAFNEYQLLCALREGPFGVRGLNDRIEQAMVQQRKIQRHPHSRWYEGRPVMIARNDSALGLFNGDIGIALDRGQGLRVWFVMPDGTIKSVQPSRLPEHDTTWAMTVHKSQGSEFDHAALILPSQRSPVVTRELVYTAVTRARRRLSLYADERILAGAIVTRTERRSGLATLFDEVSRIG* ->9 -MQEVAMSSQEASKMLRTYNIAWWGNNYYDVNELGHISVCPDPDVPEARVDLAKLVKAREAQGQRLPALFCFPQILQHRLRSINAAFKRARESYGYNGDYFLVYPIKVNQHRRVIESLIHSGEPLGLEAGSKAELMAVLAHAGMTRSVIVCNGYKDREYIRLALIGEKMGHKVYLVIEKMSEIAIVLEEAERLNVVPRLGVRARLASQGSGKWQSSGGEKSKFGLAATQVLQLVETLRDAGRLDSLQLLHFHLGSQMANIRDIATGVRESARFYVELHKLGVNIQCFDVGGGLGVDYEGTRSQSDCSVNYGLNEYANNIIWAIGDACEEHGLPHPTVITESGRAVTAHHTVLVSNIIGVERNEYTDPTAPAEDAPRALQNLWETWQEMHKPGTRRSLREWLHDSQMDLHDIHIGYSSGAFSLQERAWAEQLYLSMCHEVQKQLDPQNRAHRPIIDELQERMADKMYVNFSLFQSMPDAWGIDQLFPVLPLEGLDQVPERRAVLLDITCDSDGAIDHYIDGDGIATTMPMPEYDPENPPMLGFFMVGAYQEILGNMHNLFGDTEAVDVFVFPDGSVEVELSDEGDTVADMLQYVQLDPKTLLTHFRDQVKQTDLDDALQQQFLEEFEAGLYGYTYLEDE* ->10 -MNSLPQRSTDFELTTSQDGFALSWQQRLILRHSAENPCLWIGAGVADIDMFRGNFSIKDKLNEKIALTEATVSELPDGWLVQFSRGATISATLRISADEAGRLTLDLQNDDLHHNRIWLRLAANPDDHIYGCGEQFSYFDLRGKPFPLWTSEQGVGRNKTSYVTWQADCKENAGGDYYWTFFPQPTFVSTQKYYCHVDNSCYMNFDFSAPEYHELALWEDKTTLRFECADTYIALLEKLTALLGRQPELPDWVYDGVTLGIQGGTEVCQQKLDTMRNAGVKVNGIWAQDWSGIRMTSFGKRVMWNWKWNSDNYPQLDSRIKQWKEEGVQFLSYINPYVASDKDLCAEAARHGYLAKDATGGDYLVEFGEFYGGVVDLTNPEAYDWFKDVIKKNMIALGCSGWMADFGEYLPTDTYLHNGVSAEIMHNAWPALWAKCNYEALQETGKLGEILFFMRAGYTGSQKYSTMMWAGDQNVDWSLDDGLASVVPAALSLAMTGHGLHHSDIGGYTTLFDMKRSKELLLRWCDFSAFTPMMRTHEGNRPGNNWQFDGDAETIAHFARMTTVFTTLKPYLKQAVAQNAATGLPVMRPLFLHYENDAATYTLKYQYLLGQDLLVAPVHEQGRCDWTLYLPEDHWVNIWTGEAHHGGEITVDAPIGKPPVFYRAKSEWALLFASLRNI* ->11 -MKLFLTTAALTATLISGMAFASDPVIPWATNSGGTESTHIAAMGEDLNAQHQQITHTHEGVWAANSGSIQADEAALTSNKPPVQGHPELMPHQG* ->12 -MKHLRVVACMIMLALAGCDNNDKTAPTTKSEAPAVAQPSPAQDPSQLQKLAQQSQGKALTLLDASEAQLDGAATLVLTFSIPLDPEQDFSRVVHVVDKKSGSVDGAWELAPNLKELRLRHLEPERVLVVTVDPAVKALNNATFGKSYEKTITTRDVQPSVGFASRGSLLPGKIAEGLPVMALNVNHVDVNFFRVKPGSLASFVSQWEYRSSLSNWESDNLLKMADLVYTGRFDLNPARNTREKLLLPLSDIKPLQQAGVYVAVMNQAGHYNYSNAATLFTLSDIGVSAHRYHSRLDIFTQSLENGAAQSGIEIVLLNDKGQTLAQATSDAQGHVQLEADKAAALLLARKEGQTTLLDLTLPALDLSEFNVAGAPGYSKQFFMFGPRDLYRPGETVILNGLLRDSDGKTLPDQPVKLEVVKPDGQVMRTVVSQPENGLYRLNYPLDINAPTGLWHVRANTGDNLLRSWDFHVEDFMPERMALNLTAQKTPLAPADEVKFSVVGYYLYGAPANGNTLQGQLFLRPLRDAVAALPGFQFGNIAEENLSRSLDEVQLTLDKGGRGEVSAASQWQEAHSPLQVILQASLLESGGRPVTRRVEQAIWPADTLPGIRPQFAAKAVYDYRTDTTVNQPIVDEDSNAAFDIVYANAQGEKKAVSGLQVRLIRERRDYYWNWSESEGWQSQFDQKDLVEGEQTLDLNADETGKVSFPVEWGAYRLEVKAPNETVSSVRFWAGYSWQDNSDGSGAARPDRVTLKLDKANYRPGDTMKLHIAAPVAGKGYAMVESSDGPLWWQAIDVPAQGLELTIPVDKTWNRHDLYLSTLVVRPGDKSRSATPKRAVGLLHLPLGDDNRRLDLALESPAKMRPNQPLTVRVKASVKHGEMPKQINVLVSAVDSGVLNITDYATPDPWQAFFGQKRYGADIYDIYGQVIEGQGRLAALRFGGDGDDLTRGGKPPVNHANIIAQQAQPITLNEQGEGVVTLPIGDFNGELRVMAQAWTADDFGRGESKVVVAAPVIAELNMPRFLAGGDVSRLVLDVTNLTDRPQTLNIALAASGLLELLSQQPQPVNLAPGVRTTLFVPVRALEGFGEGEIQATISGLNLPGETLGAQHKQWQIGVRPAWPAQTVNSGIALAPGESWHVPEQHLANVSPATLQGQLLLSGKPPLNLARYIRELKAYPYGCLEQTTSGLFPALYTNAAQLQSLGITGDSDEKRRAAVDIGISRILQMQRDNGGFALWDENGAEEPWLTAYAMDFLIRAGEQGYSVPPEAINRGNERLLRYLQDPGTMLIRYSDNTQASTFAAQAYAALVLARQQKAPLGALREIWERRSQAASGLPLMQLGIALNTMGDARRGEEAITLALNTPRQDERQWIADYGSSLRDNALMLSLLEENNLRPDAQNALLSSLSEQAFGQRWLSTQENNALFLAAHSRQASAGAWQAQTSLEAQPLSGDKALTRNLDADQLAALEVTNTGSQPLWLRLDSSGYPSSAPEPASNVLQIERQILGTDGQRKSLSSLRSGELVLVWLTVVADRNVPDALVVDLLPAGLELENQNLADSSASLPESGSEVQNLLNQMQQADIQYMEFRDDRFVAAVVVNEGQPVTLVYLARAVTPGTYQLAQPQVESMYAPQWRATGASEGLLIVTP* ->13 -MAKNRSRRLRKKMHIDEFQELGFSVAWRFPEGTSEEQIDKTVDDFINDVIEPNKLAFDGSGYLAWEGLICMQEIGKCTEEHQAIVRKWLEARNLEEVRTSELFDVWWD* ->14 -LKLIPFYLLALFSAASGATEINACKDLIGTWKTTADNPPYTMTILPPVEGCGEKCVKLNVQYELDVTHRNALYCHEGQEGVKGQGPMVIAFEGAYGGHAIGTYNRQLQLLWAGVIPKNKKGKWITKMENYWFRQVKAH* ->15 -MKKFYSCLPVFLLIGCAQVPLPSSVSKPVQQPGAQKEQLANANSIDECQSLPYVPSDLAKNKSLSNQNADNSASKNSAISSSIFCEKYKQTKEQALTFFQEHPQYMRSKEDEEQLMTEFKKVLLEPGSKNLSIYQTLLAAHERLQAL* ->16 -MRIKPDDNWRWYYDEEHDRMMLDLANGMLFRSRFSRKMLTPDAFCPTGFCVDDAALYFSFEEKCRDFELTKEQRAELVLNALVAIRYLKPQMPKSWHFVAHGEMWTPGTGDAASVWLSDTAEQVNLLVVEPGENAALCLLAQPGVVIAGRTMQLGDAIKIMNDRLKPQVHCHSFSLEQAV* ->17 -VLAFTLRFIKNKRYFAILAGALVIIAGLASQHAWSGNGLPQINGKALAALAKQHPVVVLFRHAERCDRSDNTCLSDSTGITVNGAQDARALGKAFSADIQNYNLYSSNTVRTIQSATWFSAGRSLTADKKMMDCGSGIYASINTLLKKSQNKNIVIFTHNHCLTYIAKNKRGVKFDPDYLNALVMYAENGKLFLDGEFVPG* ->18 -MGSNYIVIEGLEGAGKTTARDVVVETLEQLGIRNMIFTREPGGTQLAEKLRSLVLDIRSVGDEVITDKAEVLMFYAARVQLVETVIKPALAQGVWVIGDRHDLSTQAYQGGGRGIDQTMLATLRDAVLGDFRPDLTLYLDVTPEVGLKRARARGDLDRIEQESFDFFNRTRARYLELAAQDSRIRTIDATQPLDAVMRDIRATVTKWVQEQAA* ->19 -MKHIKKSVLVVLLTSHVAHASIVVGGTRLVFDGNNDESSINVENKDSKANLVQSWLSVADPQVTNKQVFIITPPLFRLDAGQKNSIRVIRSGAPLPADRESMYWLNIKGIPSIDDNASANRVEISINTQIKLIYRPPALTKSTPDSQSQQLKWQTAGDVITVNNPTPYYMNFASVTLNSHEVKSATFVPPKSSASFKLSSTAAPHGTVTWRLISDYGMSLEPHSGSF* diff --git a/tests/test_data/outputs/search/G7/run.json b/tests/test_data/outputs/search/G7/run.json index a402ede..9af2509 100755 --- a/tests/test_data/outputs/search/G7/run.json +++ b/tests/test_data/outputs/search/G7/run.json @@ -1,11 +1,15 @@ { - "analysis_start_time": "10/06/2024 11:12:09", + "analysis_start_time": "19/06/2024 10:43:48", "parameters": { - "query": "locidex/extract/G7/raw.extracted.seqs.fasta", - "outdir": "locidex/search/G7", + "command": "search", + "query": "test_dev/extract/G7/raw.extracted.seqs.fasta", + "outdir": "test_dev/search/G7", + "db": "test_set/db", + "db_group": null, "name": "G7", - "db": "locidex/db", "config": null, + "db_name": "Locidex Test Database", + "db_version": "1.0.0", "min_evalue": 0.0001, "min_dna_len": 1, "min_aa_len": 1, @@ -20,8 +24,16 @@ "format": null, "translation_table": 11, "annotate": false, - "force": true + "force": true, + "db_date": "2024/06/06", + "db_author": "James Robertson", + "db_desc": "Using Salmonella GCA_000007545.2 as source data", + "db_num_seqs": 20, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" }, - "result_file": "locidex/search/G7/seq_store.json", - "analysis_end_time": "10/06/2024 11:12:11" + "result_file": "test_dev/search/G7/seq_store.json", + "analysis_end_time": "19/06/2024 10:43:49" } \ No newline at end of file diff --git a/tests/test_data/outputs/search/G7/seq_store.json b/tests/test_data/outputs/search/G7/seq_store.json index 8416754..f833642 100755 --- a/tests/test_data/outputs/search/G7/seq_store.json +++ b/tests/test_data/outputs/search/G7/seq_store.json @@ -460,261 +460,301 @@ "parent_id": "locus_1:0:0:0", "locus_name": "locus_1:0:0:0", "seq_id": "locus_1:0:0:0", + "dna_seq": "atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa", + "dna_ambig_count": 0, "dna_hash": "d17b02d12afa7f832ee37df6f24a8f55", "dna_len": 102, + "aa_seq": "MYDPPFLEALMITASFFAIFIIIVVSVLLLEGD*", "aa_hash": "a931d1f75114576e60538364eb01a05f", "aa_len": 34, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "1": { "parent_id": "locus_10:9:0:1", "locus_name": "locus_10:9:0:1", "seq_id": "locus_10:9:0:1", + "dna_seq": "gggcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa", + "dna_ambig_count": 0, "dna_hash": "796419469778f7ec3851c813f59cfff7", "dna_len": 762, + "aa_seq": "GRILLSNDDGVHAPGIQTLAKALREFADVQVVAPDRNRSGASNSLTLESSLRTFTFDNGDIAVQMGTPTDCVYLGVNALMRPRPDIVVSGINAGPNLGDDVIYSGTVAAAMEGRHLGFPALAVSLNGYQHYDTAAAVTCALLRGLSREPLRTGRILNVNVPDLPLAQVKGIRVTRCGSRHPADKVIPQEDPRGNTLYWIGPPGDKYDAGPDTDFAAVDEGYVSVTPLHVDLTAHSAHDVVSDWLDSVGVGTQW*", "aa_hash": "7cf5ac5873242fd0899f2ce5f93d01c3", "aa_len": 254, "start_codon": "ggg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "2": { "parent_id": "locus_11:10:0:2", "locus_name": "locus_11:10:0:2", "seq_id": "locus_11:10:0:2", + "dna_seq": "gtgaacctggtgaaaaccacgggtctgcacgcggcgggcaaaggcattaacgttgccaaagtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccgacttcaacttttccggctttgacgtgactcccgcagactgggaacgctttgttaacgactccctgagctggctgggtcagttcgatatggtctgcgtcagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatctttgatagtagccgtgaagcgttagtcgccggtcttaaagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcacgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgtgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggtaagtaccgtcggcgcaggcgattccatggttggcggattgatttacggcctgctgatgcgcgagtccagcgaacatacgctgcgtctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga", + "dna_ambig_count": 0, "dna_hash": "c4266f2f24fdd8e039113c6b0955af9f", "dna_len": 858, + "aa_seq": "VNLVKTTGLHAAGKGINVAKVLKDLGIDVTVGGFLGKDNQDGFQQLFSELGIANRFQVVQGRTRINVKLTEKDGEVTDFNFSGFDVTPADWERFVNDSLSWLGQFDMVCVSGSLPAGVSPEAFTDWMTRLRSQCPCIIFDSSREALVAGLKAAPWLVKPNRRELEIWAGRKLPEMKDVIDAAHALREQGIAHVVISLGAEGALWVNASGEWIAKPPAVDVVSTVGAGDSMVGGLIYGLLMRESSEHTLRLATAVAALAVSQSNVGITDRPQLAAMMARVDLQPFN*", "aa_hash": "9b9be0e0a2b6f84053716d6c14a0fb9a", "aa_len": 286, "start_codon": "gtg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "3": { "parent_id": "locus_12:11:0:3", "locus_name": "locus_12:11:0:3", "seq_id": "locus_12:11:0:3", + "dna_seq": "atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga", + "dna_ambig_count": 0, "dna_hash": "eb72da68c159497d5f0c8eeddc51b5ae", "dna_len": 972, + "aa_seq": "MNKNKYSTPLLMLATILAGMLSPMQSAVNGQLGHWLQDGNACAVISFASGLVVMFFIIIARKETRQQFASIPTLIKKRKIPLWNWFAGLCGAMVVFSEGASASALGVATFQTALISALLLSGLLCDRFGIGVEEKKYFTPWRITGALFAVIATIFVVSPQWHSTSFILLAILPFLAGLLAGWQPAGNAKVAEATGSMLVSITWNFIVGFCVLGAALAIRIALGHVTIQLPDTWWMYLGGPLGLLSIGLMAILVRGLGLLMLGVASTAGQLLGSVLIDELIPSLGNTVYLVTIIGTLFALVGAIVTTIPEYRASKMAQKMEVSE*", "aa_hash": "2fb77f6d0f615a030a08af74f3eda277", "aa_len": 324, "start_codon": "atg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "4": { "parent_id": "locus_13:12:0:4", "locus_name": "locus_13:12:0:4", "seq_id": "locus_13:12:0:4", + "dna_seq": "atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga", + "dna_ambig_count": 0, "dna_hash": "8f300259dcb46224bdc1fe5273107324", "dna_len": 1098, + "aa_seq": "MKGRWAKYVATGVMLAMLAACSSKPTDRGQQYKDGKFTQPFSLVNQPDAVGAPINAGDFAEQVDQIRSASPRLYTNQSNVYNAVQNWLRSGGDTRTMRQFGIDAWQMEGTDNYGNVQFTGYYTPVVQARHTRQGAFQYPIYSMPPKRGRLPSRAQIYAGALSDKYILAWSNSLMDNFIMDVQGSGYIDFGDGSPLNFFSYAGKNGWPYRSIGKVLIDRGEVKKEDMSMQAIREWGEKHSEAEVRELLEQNPSFVFFKPQSFAPVKGASAVPLIGRASVASDRSIIPPGTTLLAEVPLLDNNGKFSGQYELRLMVALDVGGAIKGQHFDIYQGIGPDAGHRAGWYNHYGRVWVLKSAPGAGNVFSG*", "aa_hash": "fd5140611af6e0d9f95426bf9beaa8b1", "aa_len": 366, "start_codon": "atg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "5": { "parent_id": "locus_14:13:0:5", "locus_name": "locus_14:13:0:5", "seq_id": "locus_14:13:0:5", + "dna_seq": "ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctcttccgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccaacgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcagaaaggcgggattggtttgcgcggtccgggtgaaacccagctcgaaaccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgaaaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgctgctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaaatccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtctgcaggtacgaatgccgattgtcgactggcgtcgcctctgtaaacaagaaccagcgttgatcgaatacgtgatctag", + "dna_ambig_count": 0, "dna_hash": "2fa0b06ed72e36b4071cab9d0b4f87d0", "dna_len": 1281, + "aa_seq": "LFDRYDAGEQAVLVHIYFSQDKDMEDLQEFESLVSSAGVEAMQVITGSRKAPHPKYFVGEGKAVEIAEAVKATGAAVVLFDHALSPAQERNLERLCECRVIDRTGLILDIFAQRARTHEGKLQVELAQLRHLATRLVRGWTHLERQKGGIGLRGPGETQLETDRRLLRNRIVQIQSRLEKVEKQREQGRQSRIKADVPTVSLVGYTNAGKSTLFNQITEARVYAADQLFATLDPTLRRIDVADVGETVLADTVGFIRHLPYDLVAAFKATLQETRQATLLLHVVDAADVRVQENIEAVNTVLEEIDAHEIPTLMVMNKIDMLDDFEPRIDRDEENKPIRVWLSAQSGVGIPQLFQALTERLSGEVAQHTLRLLPQEGRLRSRFYQLQAIEKEWMEEDGSVSLQVRMPIVDWRRLCKQEPALIEYVI*", "aa_hash": "bf5190f310477277da454725d434a8ee", "aa_len": 427, "start_codon": "ttg", - "stop_codon": "tag", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tag", + "count_internal_stop": 0 }, "6": { "parent_id": "locus_15:14:0:6", "locus_name": "locus_15:14:0:6", "seq_id": "locus_15:14:0:6", + "dna_seq": "gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga", + "dna_ambig_count": 0, "dna_hash": "bc98c2fe196a68a79036814396513a8d", "dna_len": 1434, + "aa_seq": "VAQRILVLGASGYIGQHLVFALSQQGHQVRAAARRVERLEKHRLANVSCHKVDLHWPENLPALLRDIDTVYYLVHGMGEGGDFIAHERQAALNVRDALRQTPVKQLIFLSSLQAPAHEQSDHLRARQLTADTLRDAGVPVTELRAGIIVGAGSAAFEVMRDMVYNLPILTPPRWVRSRTTPIALENLLYYLVGLLEHPAHEHRILEAAGPQVLSYQQQFERFMAVSGKRRPLIPVPFPTRWISVWFLNVITSVPPTTAKALIQGLRHDLLADDAALKKLIPQTLITFDDAVRRTLKEEEKLVNSSDWGYDALAFARWRPEYGYFPKQAGFTAQTPASLSALWQVVNRLGGKEGYFFGNILWQTRAAMDRLVGHKLAKGRPSHTLLKPGDTVDSWKVIIVEPEKQLTLLFGMKAPGLGRLSFTLHDKGRYREIDVRAWWHPHGMPGLIYWLLMIPAHLFIFRGMARRIARLAEQITEK*", "aa_hash": "5d8c3cf06444c3741ebfc57cc92db0d3", "aa_len": 478, "start_codon": "gtg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "7": { "parent_id": "locus_16:15:0:7", "locus_name": "locus_16:15:0:7", "seq_id": "locus_16:15:0:7", + "dna_seq": "atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctggtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatcagcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacgctgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcattcaaccggaatggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaacgctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgaggcgctggcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtgagctgaccagccttgtgcgcaaccttaatcaactgctcaaaagcgagcgcgaacgttataacaaataccgcacgaccctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatcagcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcataccccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttttggccgacagcatcccacacagaaagaggaataa", + "dna_ambig_count": 0, "dna_hash": "a9b3cb97dac3cda6e932a49bf9a507bd", "dna_len": 1464, + "aa_seq": "MNKFARHFLPLSLRVRFLLATAGVVLVLSLAYGIVALVGYSVSFDKTTFRLLRGESNLFYTLAKWENNKISVELPENLDMQSPTMTLIYDETGKLLWTQRNIPWLIKSIQPEWLKTNGFHEIETNVDATSTLLSEDHSAQEKLKEVREDDDDAEMTHSVAVNIYPATTRMPQLTIVVVDTIPIELKRSYMVWSWFVYVLAANLLLVIPLLWIAAWWSLRPIEALAREVRELEDHHREMLNPETTRELTSLVRNLNQLLKSERERYNKYRTTLTDLTHSLKTPLAVLQSTLRSLRNEKMSVSKAEPVMLEQISRISQQIGYYLHRASMRGSGVLLSRELHPVAPLLDNLISALNKVYQRKGVNISMDISPEISFVGEQNDFVEVMGNVLDNACKYCLEFVEISARQTDDHLHIFVEDDGPGIPHSKRSLVFDRGQRADTLRPGQGVGLAVAREITEQYAGQIIASDSLLGGARMEVVFGRQHPTQKEE*", "aa_hash": "3ca5f1d7b46eda9460608ef61603c12f", "aa_len": 488, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "8": { "parent_id": "locus_17:16:0:8", "locus_name": "locus_17:16:0:8", "seq_id": "locus_17:16:0:8", + "dna_seq": "atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa", + "dna_ambig_count": 0, "dna_hash": "a0d97d985483413f3c18bfe5833ae9ce", "dna_len": 1836, + "aa_seq": "MTIQKRLLEAVEQKLLRPIDAQFALTVAGNDDPAVTLAAALLSHDAGEGHVCLPLSRLTLTEEAHPLLVAWISETATPIDWKKRLLASAAVSCGDSPAPLILCGDRLYLNRMWCNERTVARFFNEVNQAIAVDEDQLSRILDALFPPTDEVNWQKVAAAVALTRRISVISGGPGTGKTTTVAKLLAALIQMADGERCRIRLAAPTGKAAARLTESLGAALRQLPLTDAQKKRIPEDASTLHRLLGAQPGSQRLRHHAGNPLHLDVLVVDEASMIDLPMMSRLIDALPPHGRVIFLGDRDQLASVEAGAVLGDICAYVNAGFTAERARQLSRLTGSAIPAGAGTQAASLRDSLCLLQKSYRFGSDSGIGKLAAAINCGDRSAIQAVFQQGFSDIEKRTLQSSDDYAGMLDEALAGYGRYLRLLHEKAAPEAILQAFNEYQLLCALREGPFGVRGLNDRIEQAMVQQRKIQRHPHSRWYEGRPVMIARNDSALGLFNGDIGIALDRGQGLRVWFVMPDGTIKSVQPSRLPEHDTTWAMTVHKSQGSEFDHAALILPSQRSPVVTRELVYTAVTRARRRLSLYADERILAGAIVTRTERRSGLATLFDEVSRIG*", "aa_hash": "bdd2b967a7a6337b5973dd723a62fa43", "aa_len": 612, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "9": { "parent_id": "locus_18:17:0:9", "locus_name": "locus_18:17:0:9", "seq_id": "locus_18:17:0:9", + "dna_seq": "atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag", + "dna_ambig_count": 0, "dna_hash": "b3021e979faa7600756c06dfadfcf14c", "dna_len": 1914, + "aa_seq": "MQEVAMSSQEASKMLRTYNIAWWGNNYYDVNELGHISVCPDPDVPEARVDLAKLVKAREAQGQRLPALFCFPQILQHRLRSINAAFKRARESYGYNGDYFLVYPIKVNQHRRVIESLIHSGEPLGLEAGSKAELMAVLAHAGMTRSVIVCNGYKDREYIRLALIGEKMGHKVYLVIEKMSEIAIVLEEAERLNVVPRLGVRARLASQGSGKWQSSGGEKSKFGLAATQVLQLVETLRDAGRLDSLQLLHFHLGSQMANIRDIATGVRESARFYVELHKLGVNIQCFDVGGGLGVDYEGTRSQSDCSVNYGLNEYANNIIWAIGDACEEHGLPHPTVITESGRAVTAHHTVLVSNIIGVERNEYTDPTAPAEDAPRALQNLWETWQEMHKPGTRRSLREWLHDSQMDLHDIHIGYSSGAFSLQERAWAEQLYLSMCHEVQKQLDPQNRAHRPIIDELQERMADKMYVNFSLFQSMPDAWGIDQLFPVLPLEGLDQVPERRAVLLDITCDSDGAIDHYIDGDGIATTMPMPEYDPENPPMLGFFMVGAYQEILGNMHNLFGDTEAVDVFVFPDGSVEVELSDEGDTVADMLQYVQLDPKTLLTHFRDQVKQTDLDDALQQQFLEEFEAGLYGYTYLEDE*", "aa_hash": "42c4a831ee79a27c47138fe96829814b", "aa_len": 638, "start_codon": "atg", - "stop_codon": "tag", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tag", + "count_internal_stop": 0 }, "10": { "parent_id": "locus_19:18:0:10", "locus_name": "locus_19:18:0:10", "seq_id": "locus_19:18:0:10", + "dna_seq": "atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacattgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccaccgtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggccgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattactggaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcattcagggcggtacggaagtttgtcagcaaaaactggataccatgcgcaacgcaggcgtaaaagtgaacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggcgtcagcgcagagatcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacaggagaccggcaagctcggcgagatcctgttctttatgcgtgcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccggcaataactggcagttcgacggcgacgcggaaactattgcccactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgaaattaccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa", + "dna_ambig_count": 0, "dna_hash": "de32372598811d63bcc1a0eaf6872644", "dna_len": 2037, + "aa_seq": "MNSLPQRSTDFELTTSQDGFALSWQQRLILRHSAENPCLWIGAGVADIDMFRGNFSIKDKLNEKIALTEATVSELPDGWLVQFSRGATISATLRISADEAGRLTLDLQNDDLHHNRIWLRLAANPDDHIYGCGEQFSYFDLRGKPFPLWTSEQGVGRNKTSYVTWQADCKENAGGDYYWTFFPQPTFVSTQKYYCHVDNSCYMNFDFSAPEYHELALWEDKTTLRFECADTYIALLEKLTALLGRQPELPDWVYDGVTLGIQGGTEVCQQKLDTMRNAGVKVNGIWAQDWSGIRMTSFGKRVMWNWKWNSDNYPQLDSRIKQWKEEGVQFLSYINPYVASDKDLCAEAARHGYLAKDATGGDYLVEFGEFYGGVVDLTNPEAYDWFKDVIKKNMIALGCSGWMADFGEYLPTDTYLHNGVSAEIMHNAWPALWAKCNYEALQETGKLGEILFFMRAGYTGSQKYSTMMWAGDQNVDWSLDDGLASVVPAALSLAMTGHGLHHSDIGGYTTLFDMKRSKELLLRWCDFSAFTPMMRTHEGNRPGNNWQFDGDAETIAHFARMTTVFTTLKPYLKQAVAQNAATGLPVMRPLFLHYENDAATYTLKYQYLLGQDLLVAPVHEQGRCDWTLYLPEDHWVNIWTGEAHHGGEITVDAPIGKPPVFYRAKSEWALLFASLRNI*", "aa_hash": "a48a4e4dc8c7f61a7be06a7f72142198", "aa_len": 679, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "11": { "parent_id": "locus_2:1:0:11", "locus_name": "locus_2:1:0:11", "seq_id": "locus_2:1:0:11", + "dna_seq": "atgaaactttttctgacaacggcagcgctaactgcgaccttgatttccggcatggcatttgccagcgatcccgtcatcccctgggcgactaacagcggcggtacggaaagcacgcatattgcggcaatgggcgaggatttgaatgcgcagcatcagcagatcacccacacgcatgaaggcgtatgggcggccaattccggcagcattcaggctgatgaagccgcgctgaccagcaacaagccgccggtacaaggccacccggaattgatgccgcatcagggctaa", + "dna_ambig_count": 0, "dna_hash": "8b70e777f6bbf2c91ff75947824b5976", "dna_len": 285, + "aa_seq": "MKLFLTTAALTATLISGMAFASDPVIPWATNSGGTESTHIAAMGEDLNAQHQQITHTHEGVWAANSGSIQADEAALTSNKPPVQGHPELMPHQG*", "aa_hash": "6e403f4ed2da629ea2ebfe18278ed120", "aa_len": 95, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "12": { "parent_id": "locus_20:19:0:12", "locus_name": "locus_20:19:0:12", "seq_id": "locus_20:19:0:12", + "dna_seq": "atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa", + "dna_ambig_count": 0, "dna_hash": "4461918e985715e4a2b07494e1f91326", "dna_len": 4935, + "aa_seq": "MKHLRVVACMIMLALAGCDNNDKTAPTTKSEAPAVAQPSPAQDPSQLQKLAQQSQGKALTLLDASEAQLDGAATLVLTFSIPLDPEQDFSRVVHVVDKKSGSVDGAWELAPNLKELRLRHLEPERVLVVTVDPAVKALNNATFGKSYEKTITTRDVQPSVGFASRGSLLPGKIAEGLPVMALNVNHVDVNFFRVKPGSLASFVSQWEYRSSLSNWESDNLLKMADLVYTGRFDLNPARNTREKLLLPLSDIKPLQQAGVYVAVMNQAGHYNYSNAATLFTLSDIGVSAHRYHSRLDIFTQSLENGAAQSGIEIVLLNDKGQTLAQATSDAQGHVQLEADKAAALLLARKEGQTTLLDLTLPALDLSEFNVAGAPGYSKQFFMFGPRDLYRPGETVILNGLLRDSDGKTLPDQPVKLEVVKPDGQVMRTVVSQPENGLYRLNYPLDINAPTGLWHVRANTGDNLLRSWDFHVEDFMPERMALNLTAQKTPLAPADEVKFSVVGYYLYGAPANGNTLQGQLFLRPLRDAVAALPGFQFGNIAEENLSRSLDEVQLTLDKGGRGEVSAASQWQEAHSPLQVILQASLLESGGRPVTRRVEQAIWPADTLPGIRPQFAAKAVYDYRTDTTVNQPIVDEDSNAAFDIVYANAQGEKKAVSGLQVRLIRERRDYYWNWSESEGWQSQFDQKDLVEGEQTLDLNADETGKVSFPVEWGAYRLEVKAPNETVSSVRFWAGYSWQDNSDGSGAARPDRVTLKLDKANYRPGDTMKLHIAAPVAGKGYAMVESSDGPLWWQAIDVPAQGLELTIPVDKTWNRHDLYLSTLVVRPGDKSRSATPKRAVGLLHLPLGDDNRRLDLALESPAKMRPNQPLTVRVKASVKHGEMPKQINVLVSAVDSGVLNITDYATPDPWQAFFGQKRYGADIYDIYGQVIEGQGRLAALRFGGDGDDLTRGGKPPVNHANIIAQQAQPITLNEQGEGVVTLPIGDFNGELRVMAQAWTADDFGRGESKVVVAAPVIAELNMPRFLAGGDVSRLVLDVTNLTDRPQTLNIALAASGLLELLSQQPQPVNLAPGVRTTLFVPVRALEGFGEGEIQATISGLNLPGETLGAQHKQWQIGVRPAWPAQTVNSGIALAPGESWHVPEQHLANVSPATLQGQLLLSGKPPLNLARYIRELKAYPYGCLEQTTSGLFPALYTNAAQLQSLGITGDSDEKRRAAVDIGISRILQMQRDNGGFALWDENGAEEPWLTAYAMDFLIRAGEQGYSVPPEAINRGNERLLRYLQDPGTMLIRYSDNTQASTFAAQAYAALVLARQQKAPLGALREIWERRSQAASGLPLMQLGIALNTMGDARRGEEAITLALNTPRQDERQWIADYGSSLRDNALMLSLLEENNLRPDAQNALLSSLSEQAFGQRWLSTQENNALFLAAHSRQASAGAWQAQTSLEAQPLSGDKALTRNLDADQLAALEVTNTGSQPLWLRLDSSGYPSSAPEPASNVLQIERQILGTDGQRKSLSSLRSGELVLVWLTVVADRNVPDALVVDLLPAGLELENQNLADSSASLPESGSEVQNLLNQMQQADIQYMEFRDDRFVAAVVVNEGQPVTLVYLARAVTPGTYQLAQPQVESMYAPQWRATGASEGLLIVTP*", "aa_hash": "ffe361d9273afbab0ed9009dd5aa4041", "aa_len": 1645, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "13": { "parent_id": "locus_3:2:0:13", "locus_name": "locus_3:2:0:13", "seq_id": "locus_3:2:0:13", + "dna_seq": "atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa", + "dna_ambig_count": 0, "dna_hash": "670705cd2a59c4a23a897ac656a888fe", "dna_len": 327, + "aa_seq": "MAKNRSRRLRKKMHIDEFQELGFSVAWRFPEGTSEEQIDKTVDDFINDVIEPNKLAFDGSGYLAWEGLICMQEIGKCTEEHQAIVRKWLEARNLEEVRTSELFDVWWD*", "aa_hash": "f46f4f7b2362abaf1440c9752e10670e", "aa_len": 109, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "14": { "parent_id": "locus_4:3:0:14", "locus_name": "locus_4:3:0:14", "seq_id": "locus_4:3:0:14", + "dna_seq": "ctgaaactaatacctttttatttgctcgcgctattttctgccgcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaaccactgcagataacccgccttataccatgacaatattgccgccagtagaaggctgcggggaaaaatgtgtgaaactgaacgtacagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaaggacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctataaccgacagctgcaattactttgggctggcgtaataccgaaaaataaaaaggggaaatggataacgaaaatggaaaattactggttcaggcaggtaaaagcgcactaa", + "dna_ambig_count": 0, "dna_hash": "73790840c76943caac0ebb3b2b3f0b98", "dna_len": 417, + "aa_seq": "LKLIPFYLLALFSAASGATEINACKDLIGTWKTTADNPPYTMTILPPVEGCGEKCVKLNVQYELDVTHRNALYCHEGQEGVKGQGPMVIAFEGAYGGHAIGTYNRQLQLLWAGVIPKNKKGKWITKMENYWFRQVKAH*", "aa_hash": "77784601d754a5f36152853592023b08", "aa_len": 139, "start_codon": "ctg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "15": { "parent_id": "locus_5:4:0:15", "locus_name": "locus_5:4:0:15", "seq_id": "locus_5:4:0:15", + "dna_seq": "atgaaaaaattttatagctgtcttcctgtctttttactgatcggctgtgcccaggtgcccctcccttcctccgtgagcaaaccggtacagcaacctggcgctcagaaagagcaactggccaacgcaaatagtattgatgagtgtcagtctcttccgtatgtgccgtcagaccttgcgaagaataaatcattatcaaaccagaacgctgataattccgcatcaaaaaatagcgcaatcagctcaagcattttttgcgaaaaatataaacaaaccaaagaacaggcgcttaccttcttccaggaacatccacaatacatgcgttcgaaagaagatgaagagcaactcatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa", + "dna_ambig_count": 0, "dna_hash": "8cf4341689dd00f74adfcc43d1f4a35e", "dna_len": 444, + "aa_seq": "MKKFYSCLPVFLLIGCAQVPLPSSVSKPVQQPGAQKEQLANANSIDECQSLPYVPSDLAKNKSLSNQNADNSASKNSAISSSIFCEKYKQTKEQALTFFQEHPQYMRSKEDEEQLMTEFKKVLLEPGSKNLSIYQTLLAAHERLQAL*", "aa_hash": "736cc3184dda2c5ac596f76753272622", "aa_len": 148, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "16": { "parent_id": "locus_6:5:0:16", "locus_name": "locus_6:5:0:16", "seq_id": "locus_6:5:0:16", + "dna_seq": "atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa", + "dna_ambig_count": 0, "dna_hash": "a11561f2804e2c32c78049f8b9aeb517", "dna_len": 543, + "aa_seq": "MRIKPDDNWRWYYDEEHDRMMLDLANGMLFRSRFSRKMLTPDAFCPTGFCVDDAALYFSFEEKCRDFELTKEQRAELVLNALVAIRYLKPQMPKSWHFVAHGEMWTPGTGDAASVWLSDTAEQVNLLVVEPGENAALCLLAQPGVVIAGRTMQLGDAIKIMNDRLKPQVHCHSFSLEQAV*", "aa_hash": "f7a904be6607ca5a6d9388ad9ca3693a", "aa_len": 181, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "17": { "parent_id": "locus_7:6:0:17", "locus_name": "locus_7:6:0:17", "seq_id": "locus_7:6:0:17", + "dna_seq": "gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggcctcacaacatgcctggagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgtcagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcaccatccagtcggcaacctggttttccgcaggcaggtcgcttacggcggataaaaaaatgatggattgcggcagcggtatttatgcttcaatcaatacgttactgaaaaagtcgcagaataaaaatatagtcatttttacccataatcattgtttgacgtatattgcaaaaaataagcgcggcgtgaagtttgatccggactatcttaatgcgctcgttatgtatgcggaaaacggaaaactctttttagacggtgaatttgttcccggctag", + "dna_ambig_count": 0, "dna_hash": "49d9878c9d3071aa1d2f26cb947b784c", "dna_len": 606, + "aa_seq": "VLAFTLRFIKNKRYFAILAGALVIIAGLASQHAWSGNGLPQINGKALAALAKQHPVVVLFRHAERCDRSDNTCLSDSTGITVNGAQDARALGKAFSADIQNYNLYSSNTVRTIQSATWFSAGRSLTADKKMMDCGSGIYASINTLLKKSQNKNIVIFTHNHCLTYIAKNKRGVKFDPDYLNALVMYAENGKLFLDGEFVPG*", "aa_hash": "a1169e1ef4c2882247a9349da07cb6bd", "aa_len": 202, "start_codon": "gtg", - "stop_codon": "tag", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tag", + "count_internal_stop": 0 }, "18": { "parent_id": "locus_8:7:0:18", "locus_name": "locus_8:7:0:18", "seq_id": "locus_8:7:0:18", + "dna_seq": "atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga", + "dna_ambig_count": 0, "dna_hash": "7ebe74afecf146ec4db816c8deced64f", "dna_len": 642, + "aa_seq": "MGSNYIVIEGLEGAGKTTARDVVVETLEQLGIRNMIFTREPGGTQLAEKLRSLVLDIRSVGDEVITDKAEVLMFYAARVQLVETVIKPALAQGVWVIGDRHDLSTQAYQGGGRGIDQTMLATLRDAVLGDFRPDLTLYLDVTPEVGLKRARARGDLDRIEQESFDFFNRTRARYLELAAQDSRIRTIDATQPLDAVMRDIRATVTKWVQEQAA*", "aa_hash": "ad5a47cb3a55985083b963b0cf67f382", "aa_len": 214, "start_codon": "atg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "19": { "parent_id": "locus_9:8:0:19", "locus_name": "locus_9:8:0:19", "seq_id": "locus_9:8:0:19", + "dna_seq": "atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag", + "dna_ambig_count": 0, "dna_hash": "41ebb36872854b2b33c8c028e23d8ad1", "dna_len": 684, + "aa_seq": "MKHIKKSVLVVLLTSHVAHASIVVGGTRLVFDGNNDESSINVENKDSKANLVQSWLSVADPQVTNKQVFIITPPLFRLDAGQKNSIRVIRSGAPLPADRESMYWLNIKGIPSIDDNASANRVEISINTQIKLIYRPPALTKSTPDSQSQQLKWQTAGDVITVNNPTPYYMNFASVTLNSHEVKSATFVPPKSSASFKLSSTAAPHGTVTWRLISDYGMSLEPHSGSF*", "aa_hash": "02989d1dbb505d15f06e76ddc9174b7e", "aa_len": 228, "start_codon": "atg", - "stop_codon": "tag", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tag", + "count_internal_stop": 0 } }, "query_hit_columns": [], diff --git a/tests/test_data/outputs/search/G8/blast/nucleotide/hsps.txt b/tests/test_data/outputs/search/G8/blast/nucleotide/hsps.txt deleted file mode 100755 index ea86f3a..0000000 --- a/tests/test_data/outputs/search/G8/blast/nucleotide/hsps.txt +++ /dev/null @@ -1,20 +0,0 @@ -0 0 102 102 1 102 1 102 102 0 100.000 100 100 plus 1.80e-51 189 -1 9 762 762 4 762 4 762 759 0 100.000 99 99 plus 0.0 1402 -2 10 858 858 1 858 1 858 858 0 100.000 100 100 plus 0.0 1585 -3 11 972 972 1 972 1 972 972 0 100.000 100 100 plus 0.0 1796 -4 12 1098 1098 1 1098 1 1098 1098 0 100.000 100 100 plus 0.0 2028 -5 13 1281 1281 1 1281 1 1281 1281 0 100.000 100 100 plus 0.0 2366 -6 14 1434 1434 1 1434 1 1434 1434 0 100.000 100 100 plus 0.0 2649 -7 15 1464 1464 1 1464 1 1464 1464 0 100.000 100 100 plus 0.0 2704 -8 16 1836 1836 1 1836 1 1836 1836 0 100.000 100 100 plus 0.0 3391 -9 17 1914 1914 1 1914 1 1914 1914 0 100.000 100 100 plus 0.0 3535 -10 18 2037 2037 1 2037 1 2037 2037 0 100.000 100 100 plus 0.0 3762 -11 1 285 285 1 285 1 285 285 0 100.000 100 100 plus 1.03e-152 527 -12 19 4935 4935 1 4935 1 4935 4935 0 100.000 100 100 plus 0.0 9114 -13 2 327 327 1 327 1 327 327 0 100.000 100 100 plus 5.34e-176 604 -14 3 417 417 1 417 1 417 417 0 100.000 100 100 plus 0.0 771 -15 4 444 444 1 444 1 444 444 0 100.000 100 100 plus 0.0 821 -16 5 543 543 1 543 1 543 543 0 100.000 100 100 plus 0.0 1003 -17 6 606 606 1 606 1 606 606 0 100.000 100 100 plus 0.0 1120 -18 7 642 642 1 642 1 642 642 0 100.000 100 100 plus 0.0 1186 -19 8 684 684 1 684 1 684 684 0 100.000 100 100 plus 0.0 1264 diff --git a/tests/test_data/outputs/search/G8/blast/nucleotide/queries.fasta b/tests/test_data/outputs/search/G8/blast/nucleotide/queries.fasta deleted file mode 100755 index 232dea5..0000000 --- a/tests/test_data/outputs/search/G8/blast/nucleotide/queries.fasta +++ /dev/null @@ -1,40 +0,0 @@ ->0 -atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa ->1 -aaacgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa ->2 -gtgaacctggtgaaaaccccgggtctgcacgcggcgggcaaaggcattaacgttgccaatgtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccaacttcaactttcccggctttgacgtgactcccgcagactgggaacgctttgttaacgacttcctgagctggctgggtcagttcgatatggtctgcgtaagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatcgttgatactagccgtgtagagttagtcgccggtcttatagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcccgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgcgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggaaaataccgtcggcgcaggcggttccatggttggcggatggatttacggcctgctgatgcgcgagtccaccgaacatacgctgcgcctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga ->3 -atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga ->4 -atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga ->5 -ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctattacgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccagcgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcggaaaggcgggattggttcgcgcggtccgggtgaaacccagctcgaagccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgagaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgcagctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaattccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtccgcaggtacgaatgccgattgtcgactggcgtcgcttctgtaaacaagaaccagcgttgatcgaatacgtgatctag ->6 -gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga ->7 -atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctcgtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatctgcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacggtgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcactcaaccggaagggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaaggctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgatgcgctgtcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtaagctgatcagccttgtgcgcaaccttaatcaactgctcaaaagcgagcacgaacgttataacaaataccgcacgagcctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatctgcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcatagcccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttctggccgacagcatcccacacagaaagaggaataa ->8 -atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa ->9 -atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag ->10 -atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacatcgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccacagtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggtcgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattacttgaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcatacagggcggtacggaagtttgtcagcaaaaactggataacatgcgcaacgcaggcgtaaaagtgtacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggggtcagcgcagagctcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacagaagaccggcaagctcggcgagatcctgttctttatgcgggcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccagcaatgactggcagttcgacggcgacgcggaaactattgcacactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgagatttccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa ->11 -atgaaactttttctgacaacggcagcgctaactgcgaccttgacttccggcatgggatttgccagcgataccgtgatcccctgggcgactaacagtggcggtacggaaagcacgcatatcgcggcaatgggcgaggatgtgaatgcgcagcatcagcacatcatccacacgcatgaaggcgtatgtgccgcaaattccggcaccattcaggctgatgaagccgcgctgaccagcaacaagccaccggtacaagtccaaccggaattgttgccgcatcagggctaa ->12 -atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa ->13 -atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa ->14 -ctgaaactaatacctttttatttgctcgcgctattttctgcctcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaacccctgcagataacccgccttataccgtgacaatattgccgccagtagaagcctgcggggaaaaatgtgtgaaactgaacgtagagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaacgacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctacaaccgacagctgcaattacttagggctggcgtaataccgaaaaataaaaagtggaaatggataacgaagatggaaaattactggttcaggcgggtaaaagcgcactaa ->15 -atgaaaaaatttgatagctgtcatcctgtctttttactgatcggctgtgcccaggtgcctctccgttcctccgtgagcaaaccggtacagcaacctagcgctcagaaagagcaactggccaacgcaaatggtattgatgagtgtcagtcacttccgtatgtgccttcagaccttgcgaagaataaatcattatcaaaccagatcgctgataataccgcatcaaaaaatagcgcaatcagctcacgcattttttgcgaaaaatataagcaaaccaaagaacaggcgcttaccttcttccaggaacttccacaatacatgcgttcgaaagaagttgaagagcaacacatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa ->16 -atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa ->17 -gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggactcacaacatgcctgtagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgccagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcgccatccagtcggcaacctggttttcggcaggcaggtcgcgtagggcggttaaaaaaatgatggattgcggcagcggtatttattcttcaatcaatacgttactgaaaaattcgcagattaaaaatatagtcattttaacccataatcattgtttgacgtatattgtaaaaaataagcgcggcgtgaagtttgatccggagtatcttaatgcgctcgttatgtatgcggaaaacggaaaactccttttagacggtgaatttgttcccggctag ->18 -atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga ->19 -atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/search/G8/blast/protein/hsps.txt b/tests/test_data/outputs/search/G8/blast/protein/hsps.txt deleted file mode 100755 index 4b86eab..0000000 --- a/tests/test_data/outputs/search/G8/blast/protein/hsps.txt +++ /dev/null @@ -1,20 +0,0 @@ -0 0 34 34 1 34 1 34 34 0 100.000 100 100 N/A 4.94e-20 64.3 -1 9 254 254 2 254 2 254 253 0 100.000 99 99 N/A 0.0 512 -2 10 286 286 1 286 1 286 286 0 100.000 100 100 N/A 0.0 579 -3 11 324 324 1 324 1 324 324 0 100.000 100 100 N/A 0.0 640 -4 12 366 366 1 366 1 366 366 0 100.000 100 100 N/A 0.0 754 -5 13 427 427 1 427 1 427 427 0 100.000 100 100 N/A 0.0 869 -6 14 478 478 1 478 1 478 478 0 100.000 100 100 N/A 0.0 972 -7 15 488 488 1 488 1 488 488 0 100.000 100 100 N/A 0.0 1004 -8 16 612 612 1 612 1 612 612 0 100.000 100 100 N/A 0.0 1241 -9 17 638 638 1 638 1 638 638 0 100.000 100 100 N/A 0.0 1326 -10 18 679 679 1 679 1 679 679 0 100.000 100 100 N/A 0.0 1419 -11 1 95 95 1 95 1 95 95 0 100.000 100 100 N/A 1.23e-68 191 -12 19 1645 1645 1 1645 1 1645 1645 0 100.000 100 100 N/A 0.0 3332 -13 2 109 109 1 109 1 109 109 0 100.000 100 100 N/A 3.09e-81 224 -14 3 139 139 1 139 1 139 139 0 100.000 100 100 N/A 2.34e-106 290 -15 4 148 148 1 148 1 148 148 0 100.000 100 100 N/A 1.04e-110 302 -16 5 181 181 1 181 1 181 181 0 100.000 100 100 N/A 1.79e-140 380 -17 6 202 202 1 202 1 202 202 0 100.000 100 100 N/A 3.71e-154 416 -18 7 214 214 1 214 1 214 214 0 100.000 100 100 N/A 1.75e-157 426 -19 8 228 228 1 228 1 228 228 0 100.000 100 100 N/A 6.32e-172 463 diff --git a/tests/test_data/outputs/search/G8/blast/protein/queries.fasta b/tests/test_data/outputs/search/G8/blast/protein/queries.fasta deleted file mode 100755 index cb18c87..0000000 --- a/tests/test_data/outputs/search/G8/blast/protein/queries.fasta +++ /dev/null @@ -1,40 +0,0 @@ ->0 -MYDPPFLEALMITASFFAIFIIIVVSVLLLEGD* ->1 -KRILLSNDDGVHAPGIQTLAKALREFADVQVVAPDRNRSGASNSLTLESSLRTFTFDNGDIAVQMGTPTDCVYLGVNALMRPRPDIVVSGINAGPNLGDDVIYSGTVAAAMEGRHLGFPALAVSLNGYQHYDTAAAVTCALLRGLSREPLRTGRILNVNVPDLPLAQVKGIRVTRCGSRHPADKVIPQEDPRGNTLYWIGPPGDKYDAGPDTDFAAVDEGYVSVTPLHVDLTAHSAHDVVSDWLDSVGVGTQW* ->2 -VNLVKTPGLHAAGKGINVANVLKDLGIDVTVGGFLGKDNQDGFQQLFSELGIANRFQVVQGRTRINVKLTEKDGEVTNFNFPGFDVTPADWERFVNDFLSWLGQFDMVCVSGSLPAGVSPEAFTDWMTRLRSQCPCIIVDTSRVELVAGLIAAPWLVKPNRRELEIWAGRKLPEMKDVIDAAPALREQGIAHVVISLGAEGALRVNASGEWIAKPPAVDVENTVGAGGSMVGGWIYGLLMRESTEHTLRLATAVAALAVSQSNVGITDRPQLAAMMARVDLQPFN* ->3 -MNKNKYSTPLLMLATILAGMLSPMQSAVNGQLGHWLQDGNACAVISFASGLVVMFFIIIARKETRQQFASIPTLIKKRKIPLWNWFAGLCGAMVVFSEGASASALGVATFQTALISALLLSGLLCDRFGIGVEEKKYFTPWRITGALFAVIATIFVVSPQWHSTSFILLAILPFLAGLLAGWQPAGNAKVAEATGSMLVSITWNFIVGFCVLGAALAIRIALGHVTIQLPDTWWMYLGGPLGLLSIGLMAILVRGLGLLMLGVASTAGQLLGSVLIDELIPSLGNTVYLVTIIGTLFALVGAIVTTIPEYRASKMAQKMEVSE* ->4 -MKGRWAKYVATGVMLAMLAACSSKPTDRGQQYKDGKFTQPFSLVNQPDAVGAPINAGDFAEQVDQIRSASPRLYTNQSNVYNAVQNWLRSGGDTRTMRQFGIDAWQMEGTDNYGNVQFTGYYTPVVQARHTRQGAFQYPIYSMPPKRGRLPSRAQIYAGALSDKYILAWSNSLMDNFIMDVQGSGYIDFGDGSPLNFFSYAGKNGWPYRSIGKVLIDRGEVKKEDMSMQAIREWGEKHSEAEVRELLEQNPSFVFFKPQSFAPVKGASAVPLIGRASVASDRSIIPPGTTLLAEVPLLDNNGKFSGQYELRLMVALDVGGAIKGQHFDIYQGIGPDAGHRAGWYNHYGRVWVLKSAPGAGNVFSG* ->5 -LFDRYDAGEQAVLVHIYFSQDKDMEDLQEFESLVYYAGVEAMQVITGSRKAPHPKYFVGEGKAVEIAEAVKATGAAVVLFDHALSPAQERNLERLCECRVIDRTGLILDIFAQRARTHEGKLQVELAQLRHLATRLVRGWTHLERRKGGIGSRGPGETQLEADRRLLRNRIVQIQSRLEKVEKQREQGRQSRIKADVPTVSLVGYTNAGKSTLFNQITEARVYAADQLFATLDPTLRRIDVADVGETVLADTVGFIRHLPYDLVAAFKATLQETRQATQLLHVVDAADVRVQENIEAVNTVLEEIDAHEFPTLMVMNKIDMLDDFEPRIDRDEENKPIRVWLSAQSGVGIPQLFQALTERLSGEVAQHTLRLLPQEGRLRSRFYQLQAIEKEWMEEDGSVSPQVRMPIVDWRRFCKQEPALIEYVI* ->6 -VAQRILVLGASGYIGQHLVFALSQQGHQVRAAARRVERLEKHRLANVSCHKVDLHWPENLPALLRDIDTVYYLVHGMGEGGDFIAHERQAALNVRDALRQTPVKQLIFLSSLQAPAHEQSDHLRARQLTADTLRDAGVPVTELRAGIIVGAGSAAFEVMRDMVYNLPILTPPRWVRSRTTPIALENLLYYLVGLLEHPAHEHRILEAAGPQVLSYQQQFERFMAVSGKRRPLIPVPFPTRWISVWFLNVITSVPPTTAKALIQGLRHDLLADDAALKKLIPQTLITFDDAVRRTLKEEEKLVNSSDWGYDALAFARWRPEYGYFPKQAGFTAQTPASLSALWQVVNRLGGKEGYFFGNILWQTRAAMDRLVGHKLAKGRPSHTLLKPGDTVDSWKVIIVEPEKQLTLLFGMKAPGLGRLSFTLHDKGRYREIDVRAWWHPHGMPGLIYWLLMIPAHLFIFRGMARRIARLAEQITEK* ->7 -MNKFARHFLPLSLRVRFLLATAGVVLVLSLAYGIVALVGYSVSFDKTTFRLLRGESNLFYTLAKWENNKICVELPENLDMQSPTMTVIYDETGKLLWTQRNIPWLIKSTQPEGLKTNGFHEIETNVDATSTLLSEDHSAQEKLKEVREDDDDAEMTHSVAVNIYPATTRMPQLTIVVVDTIPIELKGSYMVWSWFVYVLAANLLLVIPLLWIAAWWSLRPIDALSREVRELEDHHREMLNPETTRKLISLVRNLNQLLKSEHERYNKYRTSLTDLTHSLKTPLAVLQSTLRSLRNEKMSVSKAEPVMLEQISRISQQIGYYLHRASMRGSGVLLSRELHPVAPLLDNLISALNKVYLRKGVNISMDISPEISFVGEQNDFVEVMGNVLDNACKYCLEFVEISARQTDDHLHIFVEDDGPGIAHSKRSLVFDRGQRADTLRPGQGVGLAVAREITEQYAGQIIASDSLLGGARMEVVSGRQHPTQKEE* ->8 -MTIQKRLLEAVEQKLLRPIDAQFALTVAGNDDPAVTLAAALLSHDAGEGHVCLPLSRLTLTEEAHPLLVAWISETATPIDWKKRLLASAAVSCGDSPAPLILCGDRLYLNRMWCNERTVARFFNEVNQAIAVDEDQLSRILDALFPPTDEVNWQKVAAAVALTRRISVISGGPGTGKTTTVAKLLAALIQMADGERCRIRLAAPTGKAAARLTESLGAALRQLPLTDAQKKRIPEDASTLHRLLGAQPGSQRLRHHAGNPLHLDVLVVDEASMIDLPMMSRLIDALPPHGRVIFLGDRDQLASVEAGAVLGDICAYVNAGFTAERARQLSRLTGSAIPAGAGTQAASLRDSLCLLQKSYRFGSDSGIGKLAAAINCGDRSAIQAVFQQGFSDIEKRTLQSSDDYAGMLDEALAGYGRYLRLLHEKAAPEAILQAFNEYQLLCALREGPFGVRGLNDRIEQAMVQQRKIQRHPHSRWYEGRPVMIARNDSALGLFNGDIGIALDRGQGLRVWFVMPDGTIKSVQPSRLPEHDTTWAMTVHKSQGSEFDHAALILPSQRSPVVTRELVYTAVTRARRRLSLYADERILAGAIVTRTERRSGLATLFDEVSRIG* ->9 -MQEVAMSSQEASKMLRTYNIAWWGNNYYDVNELGHISVCPDPDVPEARVDLAKLVKAREAQGQRLPALFCFPQILQHRLRSINAAFKRARESYGYNGDYFLVYPIKVNQHRRVIESLIHSGEPLGLEAGSKAELMAVLAHAGMTRSVIVCNGYKDREYIRLALIGEKMGHKVYLVIEKMSEIAIVLEEAERLNVVPRLGVRARLASQGSGKWQSSGGEKSKFGLAATQVLQLVETLRDAGRLDSLQLLHFHLGSQMANIRDIATGVRESARFYVELHKLGVNIQCFDVGGGLGVDYEGTRSQSDCSVNYGLNEYANNIIWAIGDACEEHGLPHPTVITESGRAVTAHHTVLVSNIIGVERNEYTDPTAPAEDAPRALQNLWETWQEMHKPGTRRSLREWLHDSQMDLHDIHIGYSSGAFSLQERAWAEQLYLSMCHEVQKQLDPQNRAHRPIIDELQERMADKMYVNFSLFQSMPDAWGIDQLFPVLPLEGLDQVPERRAVLLDITCDSDGAIDHYIDGDGIATTMPMPEYDPENPPMLGFFMVGAYQEILGNMHNLFGDTEAVDVFVFPDGSVEVELSDEGDTVADMLQYVQLDPKTLLTHFRDQVKQTDLDDALQQQFLEEFEAGLYGYTYLEDE* ->10 -MNSLPQRSTDFELTTSQDGFALSWQQRLILRHSAENPCLWIGAGVADIDMFRGNFSIKDKLNEKIALTEATVSELPDGWLVQFSRGATISATLRISADEAGRLTLDLQNDDLHHNRIWLRLAANPDDHIYGCGEQFSYFDLRGKPFPLWTSEQGVGRNKTSYVTWQADCKENAGGDYYLTFFPQPTFVSTQKYYCHVDNSCYMNFDFSAPEYHELALWEDKTTLRFECADTYIALLEKLTALLGRQPELPDWVYDGVTLGIQGGTEVCQQKLDNMRNAGVKVYGIWAQDWSGIRMTSFGKRVMWNWKWNSDNYPQLDSRIKQWKEEGVQFLSYINPYVASDKDLCAEAARHGYLAKDATGGDYLVEFGEFYGGVVDLTNPEAYDWFKDVIKKNMIALGCSGWMADFGEYLPTDTYLHNGVSAELMHNAWPALWAKCNYEALQKTGKLGEILFFMRAGYTGSQKYSTMMWAGDQNVDWSLDDGLASVVPAALSLAMTGHGLHHSDIGGYTTLFDMKRSKELLLRWCDFSAFTPMMRTHEGNRPSNDWQFDGDAETIAHFARMTTVFTTLKPYLKQAVAQNAATGLPVMRPLFLHYENDAATYTLKYQYLLGQDLLVAPVHEQGRCDWTLYLPEDHWVNIWTGEAHHGGEISVDAPIGKPPVFYRAKSEWALLFASLRNI* ->11 -MKLFLTTAALTATLTSGMGFASDTVIPWATNSGGTESTHIAAMGEDVNAQHQHIIHTHEGVCAANSGTIQADEAALTSNKPPVQVQPELLPHQG* ->12 -MKHLRVVACMIMLALAGCDNNDKTAPTTKSEAPAVAQPSPAQDPSQLQKLAQQSQGKALTLLDASEAQLDGAATLVLTFSIPLDPEQDFSRVVHVVDKKSGSVDGAWELAPNLKELRLRHLEPERVLVVTVDPAVKALNNATFGKSYEKTITTRDVQPSVGFASRGSLLPGKIAEGLPVMALNVNHVDVNFFRVKPGSLASFVSQWEYRSSLSNWESDNLLKMADLVYTGRFDLNPARNTREKLLLPLSDIKPLQQAGVYVAVMNQAGHYNYSNAATLFTLSDIGVSAHRYHSRLDIFTQSLENGAAQSGIEIVLLNDKGQTLAQATSDAQGHVQLEADKAAALLLARKEGQTTLLDLTLPALDLSEFNVAGAPGYSKQFFMFGPRDLYRPGETVILNGLLRDSDGKTLPDQPVKLEVVKPDGQVMRTVVSQPENGLYRLNYPLDINAPTGLWHVRANTGDNLLRSWDFHVEDFMPERMALNLTAQKTPLAPADEVKFSVVGYYLYGAPANGNTLQGQLFLRPLRDAVAALPGFQFGNIAEENLSRSLDEVQLTLDKGGRGEVSAASQWQEAHSPLQVILQASLLESGGRPVTRRVEQAIWPADTLPGIRPQFAAKAVYDYRTDTTVNQPIVDEDSNAAFDIVYANAQGEKKAVSGLQVRLIRERRDYYWNWSESEGWQSQFDQKDLVEGEQTLDLNADETGKVSFPVEWGAYRLEVKAPNETVSSVRFWAGYSWQDNSDGSGAARPDRVTLKLDKANYRPGDTMKLHIAAPVAGKGYAMVESSDGPLWWQAIDVPAQGLELTIPVDKTWNRHDLYLSTLVVRPGDKSRSATPKRAVGLLHLPLGDDNRRLDLALESPAKMRPNQPLTVRVKASVKHGEMPKQINVLVSAVDSGVLNITDYATPDPWQAFFGQKRYGADIYDIYGQVIEGQGRLAALRFGGDGDDLTRGGKPPVNHANIIAQQAQPITLNEQGEGVVTLPIGDFNGELRVMAQAWTADDFGRGESKVVVAAPVIAELNMPRFLAGGDVSRLVLDVTNLTDRPQTLNIALAASGLLELLSQQPQPVNLAPGVRTTLFVPVRALEGFGEGEIQATISGLNLPGETLGAQHKQWQIGVRPAWPAQTVNSGIALAPGESWHVPEQHLANVSPATLQGQLLLSGKPPLNLARYIRELKAYPYGCLEQTTSGLFPALYTNAAQLQSLGITGDSDEKRRAAVDIGISRILQMQRDNGGFALWDENGAEEPWLTAYAMDFLIRAGEQGYSVPPEAINRGNERLLRYLQDPGTMLIRYSDNTQASTFAAQAYAALVLARQQKAPLGALREIWERRSQAASGLPLMQLGIALNTMGDARRGEEAITLALNTPRQDERQWIADYGSSLRDNALMLSLLEENNLRPDAQNALLSSLSEQAFGQRWLSTQENNALFLAAHSRQASAGAWQAQTSLEAQPLSGDKALTRNLDADQLAALEVTNTGSQPLWLRLDSSGYPSSAPEPASNVLQIERQILGTDGQRKSLSSLRSGELVLVWLTVVADRNVPDALVVDLLPAGLELENQNLADSSASLPESGSEVQNLLNQMQQADIQYMEFRDDRFVAAVVVNEGQPVTLVYLARAVTPGTYQLAQPQVESMYAPQWRATGASEGLLIVTP* ->13 -MAKNRSRRLRKKMHIDEFQELGFSVAWRFPEGTSEEQIDKTVDDFINDVIEPNKLAFDGSGYLAWEGLICMQEIGKCTEEHQAIVRKWLEARNLEEVRTSELFDVWWD* ->14 -LKLIPFYLLALFSASSGATEINACKDLIGTWKTPADNPPYTVTILPPVEACGEKCVKLNVEYELDVTHRNALYCHERQEGVKGQGPMVIAFEGAYGGHAIGTYNRQLQLLRAGVIPKNKKWKWITKMENYWFRRVKAH* ->15 -MKKFDSCHPVFLLIGCAQVPLRSSVSKPVQQPSAQKEQLANANGIDECQSLPYVPSDLAKNKSLSNQIADNTASKNSAISSRIFCEKYKQTKEQALTFFQELPQYMRSKEVEEQHMTEFKKVLLEPGSKNLSIYQTLLAAHERLQAL* ->16 -MRIKPDDNWRWYYDEEHDRMMLDLANGMLFRSRFSRKMLTPDAFCPTGFCVDDAALYFSFEEKCRDFELTKEQRAELVLNALVAIRYLKPQMPKSWHFVAHGEMWTPGTGDAASVWLSDTAEQVNLLVVEPGENAALCLLAQPGVVIAGRTMQLGDAIKIMNDRLKPQVHCHSFSLEQAV* ->17 -VLAFTLRFIKNKRYFAILAGALVIIAGLDSQHACSGNGLPQINGKALAALAKQHPVVVLFRHAERCDRSDNTCLPDSTGITVNGAQDARALGKAFSADIQNYNLYSSNTVRAIQSATWFSAGRSRRAVKKMMDCGSGIYSSINTLLKNSQIKNIVILTHNHCLTYIVKNKRGVKFDPEYLNALVMYAENGKLLLDGEFVPG* ->18 -MGSNYIVIEGLEGAGKTTARDVVVETLEQLGIRNMIFTREPGGTQLAEKLRSLVLDIRSVGDEVITDKAEVLMFYAARVQLVETVIKPALAQGVWVIGDRHDLSTQAYQGGGRGIDQTMLATLRDAVLGDFRPDLTLYLDVTPEVGLKRARARGDLDRIEQESFDFFNRTRARYLELAAQDSRIRTIDATQPLDAVMRDIRATVTKWVQEQAA* ->19 -MKHIKKSVLVVLLTSHVAHASIVVGGTRLVFDGNNDESSINVENKDSKANLVQSWLSVADPQVTNKQVFIITPPLFRLDAGQKNSIRVIRSGAPLPADRESMYWLNIKGIPSIDDNASANRVEISINTQIKLIYRPPALTKSTPDSQSQQLKWQTAGDVITVNNPTPYYMNFASVTLNSHEVKSATFVPPKSSASFKLSSTAAPHGTVTWRLISDYGMSLEPHSGSF* diff --git a/tests/test_data/outputs/search/G8/run.json b/tests/test_data/outputs/search/G8/run.json index 4c0f386..ac8fc6f 100755 --- a/tests/test_data/outputs/search/G8/run.json +++ b/tests/test_data/outputs/search/G8/run.json @@ -1,11 +1,15 @@ { - "analysis_start_time": "10/06/2024 11:12:15", + "analysis_start_time": "19/06/2024 10:43:51", "parameters": { - "query": "locidex/extract/G8/raw.extracted.seqs.fasta", - "outdir": "locidex/search/G8", + "command": "search", + "query": "test_dev/extract/G8/raw.extracted.seqs.fasta", + "outdir": "test_dev/search/G8", + "db": "test_set/db", + "db_group": null, "name": "G8", - "db": "locidex/db", "config": null, + "db_name": "Locidex Test Database", + "db_version": "1.0.0", "min_evalue": 0.0001, "min_dna_len": 1, "min_aa_len": 1, @@ -20,8 +24,16 @@ "format": null, "translation_table": 11, "annotate": false, - "force": true + "force": true, + "db_date": "2024/06/06", + "db_author": "James Robertson", + "db_desc": "Using Salmonella GCA_000007545.2 as source data", + "db_num_seqs": 20, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" }, - "result_file": "locidex/search/G8/seq_store.json", - "analysis_end_time": "10/06/2024 11:12:17" + "result_file": "test_dev/search/G8/seq_store.json", + "analysis_end_time": "19/06/2024 10:43:52" } \ No newline at end of file diff --git a/tests/test_data/outputs/search/G8/seq_store.json b/tests/test_data/outputs/search/G8/seq_store.json index 9476613..ff8f23f 100755 --- a/tests/test_data/outputs/search/G8/seq_store.json +++ b/tests/test_data/outputs/search/G8/seq_store.json @@ -460,261 +460,301 @@ "parent_id": "locus_1:0:0:0", "locus_name": "locus_1:0:0:0", "seq_id": "locus_1:0:0:0", + "dna_seq": "atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa", + "dna_ambig_count": 0, "dna_hash": "d17b02d12afa7f832ee37df6f24a8f55", "dna_len": 102, + "aa_seq": "MYDPPFLEALMITASFFAIFIIIVVSVLLLEGD*", "aa_hash": "a931d1f75114576e60538364eb01a05f", "aa_len": 34, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "1": { "parent_id": "locus_10:9:0:1", "locus_name": "locus_10:9:0:1", "seq_id": "locus_10:9:0:1", + "dna_seq": "aaacgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa", + "dna_ambig_count": 0, "dna_hash": "fd6284b58a891cf02058906c9ee37a00", "dna_len": 762, + "aa_seq": "KRILLSNDDGVHAPGIQTLAKALREFADVQVVAPDRNRSGASNSLTLESSLRTFTFDNGDIAVQMGTPTDCVYLGVNALMRPRPDIVVSGINAGPNLGDDVIYSGTVAAAMEGRHLGFPALAVSLNGYQHYDTAAAVTCALLRGLSREPLRTGRILNVNVPDLPLAQVKGIRVTRCGSRHPADKVIPQEDPRGNTLYWIGPPGDKYDAGPDTDFAAVDEGYVSVTPLHVDLTAHSAHDVVSDWLDSVGVGTQW*", "aa_hash": "443ddee4a99bfc9bbbab56d103f7d81d", "aa_len": 254, "start_codon": "aaa", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "2": { "parent_id": "locus_11:10:0:2", "locus_name": "locus_11:10:0:2", "seq_id": "locus_11:10:0:2", + "dna_seq": "gtgaacctggtgaaaaccccgggtctgcacgcggcgggcaaaggcattaacgttgccaatgtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccaacttcaactttcccggctttgacgtgactcccgcagactgggaacgctttgttaacgacttcctgagctggctgggtcagttcgatatggtctgcgtaagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatcgttgatactagccgtgtagagttagtcgccggtcttatagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcccgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgcgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggaaaataccgtcggcgcaggcggttccatggttggcggatggatttacggcctgctgatgcgcgagtccaccgaacatacgctgcgcctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga", + "dna_ambig_count": 0, "dna_hash": "5b128d659955716833ce42f2bb060212", "dna_len": 858, + "aa_seq": "VNLVKTPGLHAAGKGINVANVLKDLGIDVTVGGFLGKDNQDGFQQLFSELGIANRFQVVQGRTRINVKLTEKDGEVTNFNFPGFDVTPADWERFVNDFLSWLGQFDMVCVSGSLPAGVSPEAFTDWMTRLRSQCPCIIVDTSRVELVAGLIAAPWLVKPNRRELEIWAGRKLPEMKDVIDAAPALREQGIAHVVISLGAEGALRVNASGEWIAKPPAVDVENTVGAGGSMVGGWIYGLLMRESTEHTLRLATAVAALAVSQSNVGITDRPQLAAMMARVDLQPFN*", "aa_hash": "d6a46f107d0604f27820147b523948c8", "aa_len": 286, "start_codon": "gtg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "3": { "parent_id": "locus_12:11:0:3", "locus_name": "locus_12:11:0:3", "seq_id": "locus_12:11:0:3", + "dna_seq": "atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga", + "dna_ambig_count": 0, "dna_hash": "eb72da68c159497d5f0c8eeddc51b5ae", "dna_len": 972, + "aa_seq": "MNKNKYSTPLLMLATILAGMLSPMQSAVNGQLGHWLQDGNACAVISFASGLVVMFFIIIARKETRQQFASIPTLIKKRKIPLWNWFAGLCGAMVVFSEGASASALGVATFQTALISALLLSGLLCDRFGIGVEEKKYFTPWRITGALFAVIATIFVVSPQWHSTSFILLAILPFLAGLLAGWQPAGNAKVAEATGSMLVSITWNFIVGFCVLGAALAIRIALGHVTIQLPDTWWMYLGGPLGLLSIGLMAILVRGLGLLMLGVASTAGQLLGSVLIDELIPSLGNTVYLVTIIGTLFALVGAIVTTIPEYRASKMAQKMEVSE*", "aa_hash": "2fb77f6d0f615a030a08af74f3eda277", "aa_len": 324, "start_codon": "atg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "4": { "parent_id": "locus_13:12:0:4", "locus_name": "locus_13:12:0:4", "seq_id": "locus_13:12:0:4", + "dna_seq": "atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga", + "dna_ambig_count": 0, "dna_hash": "8f300259dcb46224bdc1fe5273107324", "dna_len": 1098, + "aa_seq": "MKGRWAKYVATGVMLAMLAACSSKPTDRGQQYKDGKFTQPFSLVNQPDAVGAPINAGDFAEQVDQIRSASPRLYTNQSNVYNAVQNWLRSGGDTRTMRQFGIDAWQMEGTDNYGNVQFTGYYTPVVQARHTRQGAFQYPIYSMPPKRGRLPSRAQIYAGALSDKYILAWSNSLMDNFIMDVQGSGYIDFGDGSPLNFFSYAGKNGWPYRSIGKVLIDRGEVKKEDMSMQAIREWGEKHSEAEVRELLEQNPSFVFFKPQSFAPVKGASAVPLIGRASVASDRSIIPPGTTLLAEVPLLDNNGKFSGQYELRLMVALDVGGAIKGQHFDIYQGIGPDAGHRAGWYNHYGRVWVLKSAPGAGNVFSG*", "aa_hash": "fd5140611af6e0d9f95426bf9beaa8b1", "aa_len": 366, "start_codon": "atg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "5": { "parent_id": "locus_14:13:0:5", "locus_name": "locus_14:13:0:5", "seq_id": "locus_14:13:0:5", + "dna_seq": "ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctattacgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccagcgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcggaaaggcgggattggttcgcgcggtccgggtgaaacccagctcgaagccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgagaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgcagctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaattccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtccgcaggtacgaatgccgattgtcgactggcgtcgcttctgtaaacaagaaccagcgttgatcgaatacgtgatctag", + "dna_ambig_count": 0, "dna_hash": "b9060019038526aa6fc38d2f7510edc6", "dna_len": 1281, + "aa_seq": "LFDRYDAGEQAVLVHIYFSQDKDMEDLQEFESLVYYAGVEAMQVITGSRKAPHPKYFVGEGKAVEIAEAVKATGAAVVLFDHALSPAQERNLERLCECRVIDRTGLILDIFAQRARTHEGKLQVELAQLRHLATRLVRGWTHLERRKGGIGSRGPGETQLEADRRLLRNRIVQIQSRLEKVEKQREQGRQSRIKADVPTVSLVGYTNAGKSTLFNQITEARVYAADQLFATLDPTLRRIDVADVGETVLADTVGFIRHLPYDLVAAFKATLQETRQATQLLHVVDAADVRVQENIEAVNTVLEEIDAHEFPTLMVMNKIDMLDDFEPRIDRDEENKPIRVWLSAQSGVGIPQLFQALTERLSGEVAQHTLRLLPQEGRLRSRFYQLQAIEKEWMEEDGSVSPQVRMPIVDWRRFCKQEPALIEYVI*", "aa_hash": "05bc7823b1abc2e6d4e2c08ca5325134", "aa_len": 427, "start_codon": "ttg", - "stop_codon": "tag", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tag", + "count_internal_stop": 0 }, "6": { "parent_id": "locus_15:14:0:6", "locus_name": "locus_15:14:0:6", "seq_id": "locus_15:14:0:6", + "dna_seq": "gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga", + "dna_ambig_count": 0, "dna_hash": "bc98c2fe196a68a79036814396513a8d", "dna_len": 1434, + "aa_seq": "VAQRILVLGASGYIGQHLVFALSQQGHQVRAAARRVERLEKHRLANVSCHKVDLHWPENLPALLRDIDTVYYLVHGMGEGGDFIAHERQAALNVRDALRQTPVKQLIFLSSLQAPAHEQSDHLRARQLTADTLRDAGVPVTELRAGIIVGAGSAAFEVMRDMVYNLPILTPPRWVRSRTTPIALENLLYYLVGLLEHPAHEHRILEAAGPQVLSYQQQFERFMAVSGKRRPLIPVPFPTRWISVWFLNVITSVPPTTAKALIQGLRHDLLADDAALKKLIPQTLITFDDAVRRTLKEEEKLVNSSDWGYDALAFARWRPEYGYFPKQAGFTAQTPASLSALWQVVNRLGGKEGYFFGNILWQTRAAMDRLVGHKLAKGRPSHTLLKPGDTVDSWKVIIVEPEKQLTLLFGMKAPGLGRLSFTLHDKGRYREIDVRAWWHPHGMPGLIYWLLMIPAHLFIFRGMARRIARLAEQITEK*", "aa_hash": "5d8c3cf06444c3741ebfc57cc92db0d3", "aa_len": 478, "start_codon": "gtg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "7": { "parent_id": "locus_16:15:0:7", "locus_name": "locus_16:15:0:7", "seq_id": "locus_16:15:0:7", + "dna_seq": "atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctcgtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatctgcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacggtgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcactcaaccggaagggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaaggctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgatgcgctgtcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtaagctgatcagccttgtgcgcaaccttaatcaactgctcaaaagcgagcacgaacgttataacaaataccgcacgagcctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatctgcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcatagcccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttctggccgacagcatcccacacagaaagaggaataa", + "dna_ambig_count": 0, "dna_hash": "16e55766c603fe33c9e75d8e81743ae2", "dna_len": 1464, + "aa_seq": "MNKFARHFLPLSLRVRFLLATAGVVLVLSLAYGIVALVGYSVSFDKTTFRLLRGESNLFYTLAKWENNKICVELPENLDMQSPTMTVIYDETGKLLWTQRNIPWLIKSTQPEGLKTNGFHEIETNVDATSTLLSEDHSAQEKLKEVREDDDDAEMTHSVAVNIYPATTRMPQLTIVVVDTIPIELKGSYMVWSWFVYVLAANLLLVIPLLWIAAWWSLRPIDALSREVRELEDHHREMLNPETTRKLISLVRNLNQLLKSEHERYNKYRTSLTDLTHSLKTPLAVLQSTLRSLRNEKMSVSKAEPVMLEQISRISQQIGYYLHRASMRGSGVLLSRELHPVAPLLDNLISALNKVYLRKGVNISMDISPEISFVGEQNDFVEVMGNVLDNACKYCLEFVEISARQTDDHLHIFVEDDGPGIAHSKRSLVFDRGQRADTLRPGQGVGLAVAREITEQYAGQIIASDSLLGGARMEVVSGRQHPTQKEE*", "aa_hash": "f85b3701f5642454bf4d2263feb13354", "aa_len": 488, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "8": { "parent_id": "locus_17:16:0:8", "locus_name": "locus_17:16:0:8", "seq_id": "locus_17:16:0:8", + "dna_seq": "atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa", + "dna_ambig_count": 0, "dna_hash": "a0d97d985483413f3c18bfe5833ae9ce", "dna_len": 1836, + "aa_seq": "MTIQKRLLEAVEQKLLRPIDAQFALTVAGNDDPAVTLAAALLSHDAGEGHVCLPLSRLTLTEEAHPLLVAWISETATPIDWKKRLLASAAVSCGDSPAPLILCGDRLYLNRMWCNERTVARFFNEVNQAIAVDEDQLSRILDALFPPTDEVNWQKVAAAVALTRRISVISGGPGTGKTTTVAKLLAALIQMADGERCRIRLAAPTGKAAARLTESLGAALRQLPLTDAQKKRIPEDASTLHRLLGAQPGSQRLRHHAGNPLHLDVLVVDEASMIDLPMMSRLIDALPPHGRVIFLGDRDQLASVEAGAVLGDICAYVNAGFTAERARQLSRLTGSAIPAGAGTQAASLRDSLCLLQKSYRFGSDSGIGKLAAAINCGDRSAIQAVFQQGFSDIEKRTLQSSDDYAGMLDEALAGYGRYLRLLHEKAAPEAILQAFNEYQLLCALREGPFGVRGLNDRIEQAMVQQRKIQRHPHSRWYEGRPVMIARNDSALGLFNGDIGIALDRGQGLRVWFVMPDGTIKSVQPSRLPEHDTTWAMTVHKSQGSEFDHAALILPSQRSPVVTRELVYTAVTRARRRLSLYADERILAGAIVTRTERRSGLATLFDEVSRIG*", "aa_hash": "bdd2b967a7a6337b5973dd723a62fa43", "aa_len": 612, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "9": { "parent_id": "locus_18:17:0:9", "locus_name": "locus_18:17:0:9", "seq_id": "locus_18:17:0:9", + "dna_seq": "atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag", + "dna_ambig_count": 0, "dna_hash": "b3021e979faa7600756c06dfadfcf14c", "dna_len": 1914, + "aa_seq": "MQEVAMSSQEASKMLRTYNIAWWGNNYYDVNELGHISVCPDPDVPEARVDLAKLVKAREAQGQRLPALFCFPQILQHRLRSINAAFKRARESYGYNGDYFLVYPIKVNQHRRVIESLIHSGEPLGLEAGSKAELMAVLAHAGMTRSVIVCNGYKDREYIRLALIGEKMGHKVYLVIEKMSEIAIVLEEAERLNVVPRLGVRARLASQGSGKWQSSGGEKSKFGLAATQVLQLVETLRDAGRLDSLQLLHFHLGSQMANIRDIATGVRESARFYVELHKLGVNIQCFDVGGGLGVDYEGTRSQSDCSVNYGLNEYANNIIWAIGDACEEHGLPHPTVITESGRAVTAHHTVLVSNIIGVERNEYTDPTAPAEDAPRALQNLWETWQEMHKPGTRRSLREWLHDSQMDLHDIHIGYSSGAFSLQERAWAEQLYLSMCHEVQKQLDPQNRAHRPIIDELQERMADKMYVNFSLFQSMPDAWGIDQLFPVLPLEGLDQVPERRAVLLDITCDSDGAIDHYIDGDGIATTMPMPEYDPENPPMLGFFMVGAYQEILGNMHNLFGDTEAVDVFVFPDGSVEVELSDEGDTVADMLQYVQLDPKTLLTHFRDQVKQTDLDDALQQQFLEEFEAGLYGYTYLEDE*", "aa_hash": "42c4a831ee79a27c47138fe96829814b", "aa_len": 638, "start_codon": "atg", - "stop_codon": "tag", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tag", + "count_internal_stop": 0 }, "10": { "parent_id": "locus_19:18:0:10", "locus_name": "locus_19:18:0:10", "seq_id": "locus_19:18:0:10", + "dna_seq": "atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacatcgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccacagtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggtcgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattacttgaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcatacagggcggtacggaagtttgtcagcaaaaactggataacatgcgcaacgcaggcgtaaaagtgtacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggggtcagcgcagagctcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacagaagaccggcaagctcggcgagatcctgttctttatgcgggcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccagcaatgactggcagttcgacggcgacgcggaaactattgcacactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgagatttccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa", + "dna_ambig_count": 0, "dna_hash": "a012eee23637b48e39b00808a057e35d", "dna_len": 2037, + "aa_seq": "MNSLPQRSTDFELTTSQDGFALSWQQRLILRHSAENPCLWIGAGVADIDMFRGNFSIKDKLNEKIALTEATVSELPDGWLVQFSRGATISATLRISADEAGRLTLDLQNDDLHHNRIWLRLAANPDDHIYGCGEQFSYFDLRGKPFPLWTSEQGVGRNKTSYVTWQADCKENAGGDYYLTFFPQPTFVSTQKYYCHVDNSCYMNFDFSAPEYHELALWEDKTTLRFECADTYIALLEKLTALLGRQPELPDWVYDGVTLGIQGGTEVCQQKLDNMRNAGVKVYGIWAQDWSGIRMTSFGKRVMWNWKWNSDNYPQLDSRIKQWKEEGVQFLSYINPYVASDKDLCAEAARHGYLAKDATGGDYLVEFGEFYGGVVDLTNPEAYDWFKDVIKKNMIALGCSGWMADFGEYLPTDTYLHNGVSAELMHNAWPALWAKCNYEALQKTGKLGEILFFMRAGYTGSQKYSTMMWAGDQNVDWSLDDGLASVVPAALSLAMTGHGLHHSDIGGYTTLFDMKRSKELLLRWCDFSAFTPMMRTHEGNRPSNDWQFDGDAETIAHFARMTTVFTTLKPYLKQAVAQNAATGLPVMRPLFLHYENDAATYTLKYQYLLGQDLLVAPVHEQGRCDWTLYLPEDHWVNIWTGEAHHGGEISVDAPIGKPPVFYRAKSEWALLFASLRNI*", "aa_hash": "cb1202450e68e2b4f0d557a645f1a98d", "aa_len": 679, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "11": { "parent_id": "locus_2:1:0:11", "locus_name": "locus_2:1:0:11", "seq_id": "locus_2:1:0:11", + "dna_seq": "atgaaactttttctgacaacggcagcgctaactgcgaccttgacttccggcatgggatttgccagcgataccgtgatcccctgggcgactaacagtggcggtacggaaagcacgcatatcgcggcaatgggcgaggatgtgaatgcgcagcatcagcacatcatccacacgcatgaaggcgtatgtgccgcaaattccggcaccattcaggctgatgaagccgcgctgaccagcaacaagccaccggtacaagtccaaccggaattgttgccgcatcagggctaa", + "dna_ambig_count": 0, "dna_hash": "e35184c8ff18e9116fc8faef20532f56", "dna_len": 285, + "aa_seq": "MKLFLTTAALTATLTSGMGFASDTVIPWATNSGGTESTHIAAMGEDVNAQHQHIIHTHEGVCAANSGTIQADEAALTSNKPPVQVQPELLPHQG*", "aa_hash": "2a1a77c25ad681437705d9145aef608c", "aa_len": 95, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "12": { "parent_id": "locus_20:19:0:12", "locus_name": "locus_20:19:0:12", "seq_id": "locus_20:19:0:12", + "dna_seq": "atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa", + "dna_ambig_count": 0, "dna_hash": "4461918e985715e4a2b07494e1f91326", "dna_len": 4935, + "aa_seq": "MKHLRVVACMIMLALAGCDNNDKTAPTTKSEAPAVAQPSPAQDPSQLQKLAQQSQGKALTLLDASEAQLDGAATLVLTFSIPLDPEQDFSRVVHVVDKKSGSVDGAWELAPNLKELRLRHLEPERVLVVTVDPAVKALNNATFGKSYEKTITTRDVQPSVGFASRGSLLPGKIAEGLPVMALNVNHVDVNFFRVKPGSLASFVSQWEYRSSLSNWESDNLLKMADLVYTGRFDLNPARNTREKLLLPLSDIKPLQQAGVYVAVMNQAGHYNYSNAATLFTLSDIGVSAHRYHSRLDIFTQSLENGAAQSGIEIVLLNDKGQTLAQATSDAQGHVQLEADKAAALLLARKEGQTTLLDLTLPALDLSEFNVAGAPGYSKQFFMFGPRDLYRPGETVILNGLLRDSDGKTLPDQPVKLEVVKPDGQVMRTVVSQPENGLYRLNYPLDINAPTGLWHVRANTGDNLLRSWDFHVEDFMPERMALNLTAQKTPLAPADEVKFSVVGYYLYGAPANGNTLQGQLFLRPLRDAVAALPGFQFGNIAEENLSRSLDEVQLTLDKGGRGEVSAASQWQEAHSPLQVILQASLLESGGRPVTRRVEQAIWPADTLPGIRPQFAAKAVYDYRTDTTVNQPIVDEDSNAAFDIVYANAQGEKKAVSGLQVRLIRERRDYYWNWSESEGWQSQFDQKDLVEGEQTLDLNADETGKVSFPVEWGAYRLEVKAPNETVSSVRFWAGYSWQDNSDGSGAARPDRVTLKLDKANYRPGDTMKLHIAAPVAGKGYAMVESSDGPLWWQAIDVPAQGLELTIPVDKTWNRHDLYLSTLVVRPGDKSRSATPKRAVGLLHLPLGDDNRRLDLALESPAKMRPNQPLTVRVKASVKHGEMPKQINVLVSAVDSGVLNITDYATPDPWQAFFGQKRYGADIYDIYGQVIEGQGRLAALRFGGDGDDLTRGGKPPVNHANIIAQQAQPITLNEQGEGVVTLPIGDFNGELRVMAQAWTADDFGRGESKVVVAAPVIAELNMPRFLAGGDVSRLVLDVTNLTDRPQTLNIALAASGLLELLSQQPQPVNLAPGVRTTLFVPVRALEGFGEGEIQATISGLNLPGETLGAQHKQWQIGVRPAWPAQTVNSGIALAPGESWHVPEQHLANVSPATLQGQLLLSGKPPLNLARYIRELKAYPYGCLEQTTSGLFPALYTNAAQLQSLGITGDSDEKRRAAVDIGISRILQMQRDNGGFALWDENGAEEPWLTAYAMDFLIRAGEQGYSVPPEAINRGNERLLRYLQDPGTMLIRYSDNTQASTFAAQAYAALVLARQQKAPLGALREIWERRSQAASGLPLMQLGIALNTMGDARRGEEAITLALNTPRQDERQWIADYGSSLRDNALMLSLLEENNLRPDAQNALLSSLSEQAFGQRWLSTQENNALFLAAHSRQASAGAWQAQTSLEAQPLSGDKALTRNLDADQLAALEVTNTGSQPLWLRLDSSGYPSSAPEPASNVLQIERQILGTDGQRKSLSSLRSGELVLVWLTVVADRNVPDALVVDLLPAGLELENQNLADSSASLPESGSEVQNLLNQMQQADIQYMEFRDDRFVAAVVVNEGQPVTLVYLARAVTPGTYQLAQPQVESMYAPQWRATGASEGLLIVTP*", "aa_hash": "ffe361d9273afbab0ed9009dd5aa4041", "aa_len": 1645, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "13": { "parent_id": "locus_3:2:0:13", "locus_name": "locus_3:2:0:13", "seq_id": "locus_3:2:0:13", + "dna_seq": "atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa", + "dna_ambig_count": 0, "dna_hash": "670705cd2a59c4a23a897ac656a888fe", "dna_len": 327, + "aa_seq": "MAKNRSRRLRKKMHIDEFQELGFSVAWRFPEGTSEEQIDKTVDDFINDVIEPNKLAFDGSGYLAWEGLICMQEIGKCTEEHQAIVRKWLEARNLEEVRTSELFDVWWD*", "aa_hash": "f46f4f7b2362abaf1440c9752e10670e", "aa_len": 109, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "14": { "parent_id": "locus_4:3:0:14", "locus_name": "locus_4:3:0:14", "seq_id": "locus_4:3:0:14", + "dna_seq": "ctgaaactaatacctttttatttgctcgcgctattttctgcctcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaacccctgcagataacccgccttataccgtgacaatattgccgccagtagaagcctgcggggaaaaatgtgtgaaactgaacgtagagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaacgacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctacaaccgacagctgcaattacttagggctggcgtaataccgaaaaataaaaagtggaaatggataacgaagatggaaaattactggttcaggcgggtaaaagcgcactaa", + "dna_ambig_count": 0, "dna_hash": "ac1b21798c0f672ad26f5a91ea278590", "dna_len": 417, + "aa_seq": "LKLIPFYLLALFSASSGATEINACKDLIGTWKTPADNPPYTVTILPPVEACGEKCVKLNVEYELDVTHRNALYCHERQEGVKGQGPMVIAFEGAYGGHAIGTYNRQLQLLRAGVIPKNKKWKWITKMENYWFRRVKAH*", "aa_hash": "dbcec3a0e9ecdc165c4e9162b079f2ee", "aa_len": 139, "start_codon": "ctg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "15": { "parent_id": "locus_5:4:0:15", "locus_name": "locus_5:4:0:15", "seq_id": "locus_5:4:0:15", + "dna_seq": "atgaaaaaatttgatagctgtcatcctgtctttttactgatcggctgtgcccaggtgcctctccgttcctccgtgagcaaaccggtacagcaacctagcgctcagaaagagcaactggccaacgcaaatggtattgatgagtgtcagtcacttccgtatgtgccttcagaccttgcgaagaataaatcattatcaaaccagatcgctgataataccgcatcaaaaaatagcgcaatcagctcacgcattttttgcgaaaaatataagcaaaccaaagaacaggcgcttaccttcttccaggaacttccacaatacatgcgttcgaaagaagttgaagagcaacacatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa", + "dna_ambig_count": 0, "dna_hash": "d00defcca8588f21ce16fa1d0ac13389", "dna_len": 444, + "aa_seq": "MKKFDSCHPVFLLIGCAQVPLRSSVSKPVQQPSAQKEQLANANGIDECQSLPYVPSDLAKNKSLSNQIADNTASKNSAISSRIFCEKYKQTKEQALTFFQELPQYMRSKEVEEQHMTEFKKVLLEPGSKNLSIYQTLLAAHERLQAL*", "aa_hash": "82d8baa0a3dad18a0efd8104ee15baae", "aa_len": 148, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "16": { "parent_id": "locus_6:5:0:16", "locus_name": "locus_6:5:0:16", "seq_id": "locus_6:5:0:16", + "dna_seq": "atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa", + "dna_ambig_count": 0, "dna_hash": "a11561f2804e2c32c78049f8b9aeb517", "dna_len": 543, + "aa_seq": "MRIKPDDNWRWYYDEEHDRMMLDLANGMLFRSRFSRKMLTPDAFCPTGFCVDDAALYFSFEEKCRDFELTKEQRAELVLNALVAIRYLKPQMPKSWHFVAHGEMWTPGTGDAASVWLSDTAEQVNLLVVEPGENAALCLLAQPGVVIAGRTMQLGDAIKIMNDRLKPQVHCHSFSLEQAV*", "aa_hash": "f7a904be6607ca5a6d9388ad9ca3693a", "aa_len": 181, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "17": { "parent_id": "locus_7:6:0:17", "locus_name": "locus_7:6:0:17", "seq_id": "locus_7:6:0:17", + "dna_seq": "gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggactcacaacatgcctgtagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgccagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcgccatccagtcggcaacctggttttcggcaggcaggtcgcgtagggcggttaaaaaaatgatggattgcggcagcggtatttattcttcaatcaatacgttactgaaaaattcgcagattaaaaatatagtcattttaacccataatcattgtttgacgtatattgtaaaaaataagcgcggcgtgaagtttgatccggagtatcttaatgcgctcgttatgtatgcggaaaacggaaaactccttttagacggtgaatttgttcccggctag", + "dna_ambig_count": 0, "dna_hash": "dc94bf1ec4ff9bed2a1f460cbd958656", "dna_len": 606, + "aa_seq": "VLAFTLRFIKNKRYFAILAGALVIIAGLDSQHACSGNGLPQINGKALAALAKQHPVVVLFRHAERCDRSDNTCLPDSTGITVNGAQDARALGKAFSADIQNYNLYSSNTVRAIQSATWFSAGRSRRAVKKMMDCGSGIYSSINTLLKNSQIKNIVILTHNHCLTYIVKNKRGVKFDPEYLNALVMYAENGKLLLDGEFVPG*", "aa_hash": "da78b534d889d8f35bec304ef54f1b93", "aa_len": 202, "start_codon": "gtg", - "stop_codon": "tag", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tag", + "count_internal_stop": 0 }, "18": { "parent_id": "locus_8:7:0:18", "locus_name": "locus_8:7:0:18", "seq_id": "locus_8:7:0:18", + "dna_seq": "atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga", + "dna_ambig_count": 0, "dna_hash": "7ebe74afecf146ec4db816c8deced64f", "dna_len": 642, + "aa_seq": "MGSNYIVIEGLEGAGKTTARDVVVETLEQLGIRNMIFTREPGGTQLAEKLRSLVLDIRSVGDEVITDKAEVLMFYAARVQLVETVIKPALAQGVWVIGDRHDLSTQAYQGGGRGIDQTMLATLRDAVLGDFRPDLTLYLDVTPEVGLKRARARGDLDRIEQESFDFFNRTRARYLELAAQDSRIRTIDATQPLDAVMRDIRATVTKWVQEQAA*", "aa_hash": "ad5a47cb3a55985083b963b0cf67f382", "aa_len": 214, "start_codon": "atg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "19": { "parent_id": "locus_9:8:0:19", "locus_name": "locus_9:8:0:19", "seq_id": "locus_9:8:0:19", + "dna_seq": "atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag", + "dna_ambig_count": 0, "dna_hash": "41ebb36872854b2b33c8c028e23d8ad1", "dna_len": 684, + "aa_seq": "MKHIKKSVLVVLLTSHVAHASIVVGGTRLVFDGNNDESSINVENKDSKANLVQSWLSVADPQVTNKQVFIITPPLFRLDAGQKNSIRVIRSGAPLPADRESMYWLNIKGIPSIDDNASANRVEISINTQIKLIYRPPALTKSTPDSQSQQLKWQTAGDVITVNNPTPYYMNFASVTLNSHEVKSATFVPPKSSASFKLSSTAAPHGTVTWRLISDYGMSLEPHSGSF*", "aa_hash": "02989d1dbb505d15f06e76ddc9174b7e", "aa_len": 228, "start_codon": "atg", - "stop_codon": "tag", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tag", + "count_internal_stop": 0 } }, "query_hit_columns": [], diff --git a/tests/test_data/outputs/search/G9/blast/nucleotide/hsps.txt b/tests/test_data/outputs/search/G9/blast/nucleotide/hsps.txt deleted file mode 100755 index 6654f25..0000000 --- a/tests/test_data/outputs/search/G9/blast/nucleotide/hsps.txt +++ /dev/null @@ -1,19 +0,0 @@ -0 0 102 102 1 102 1 102 102 0 100.000 100 100 plus 1.80e-51 189 -1 10 858 858 1 858 1 858 858 19 97.786 100 100 plus 0.0 1480 -2 11 972 972 1 972 1 972 972 0 100.000 100 100 plus 0.0 1796 -3 12 1098 1098 1 1098 1 1098 1098 0 100.000 100 100 plus 0.0 2028 -4 13 1281 1281 1 1281 1 1281 1281 11 99.141 100 100 plus 0.0 2305 -5 14 1434 1434 1 1434 1 1434 1434 0 100.000 100 100 plus 0.0 2649 -6 15 1464 1464 1 1464 1 1464 1464 15 98.975 100 100 plus 0.0 2621 -7 16 1836 1836 1 1836 1 1836 1836 0 100.000 100 100 plus 0.0 3391 -8 17 1914 1914 1 1914 1 1914 1914 0 100.000 100 100 plus 0.0 3535 -9 18 2037 2037 1 2037 1 2037 2037 16 99.215 100 100 plus 0.0 3674 -10 1 285 285 1 285 1 285 285 17 94.035 100 100 plus 2.31e-124 433 -11 19 4935 4935 1 4935 1 4935 4935 0 100.000 100 100 plus 0.0 9114 -12 2 327 327 1 327 1 327 327 0 100.000 100 100 plus 5.34e-176 604 -13 3 417 417 1 417 1 417 417 11 97.362 100 100 plus 0.0 710 -14 4 444 444 1 444 1 444 444 15 96.622 100 100 plus 0.0 737 -15 5 543 543 1 543 1 543 543 0 100.000 100 100 plus 0.0 1003 -16 6 606 606 1 606 1 606 606 15 97.525 100 100 plus 0.0 1037 -17 7 642 642 1 642 1 642 642 0 100.000 100 100 plus 0.0 1186 -18 8 684 684 1 684 1 684 684 0 100.000 100 100 plus 0.0 1264 diff --git a/tests/test_data/outputs/search/G9/blast/nucleotide/queries.fasta b/tests/test_data/outputs/search/G9/blast/nucleotide/queries.fasta deleted file mode 100755 index c89bc4d..0000000 --- a/tests/test_data/outputs/search/G9/blast/nucleotide/queries.fasta +++ /dev/null @@ -1,38 +0,0 @@ ->0 -atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa ->1 -gtgaacctggtgaaaaccacgggtctgcacgcggcgggcaaaggcattaacgttgccaaagtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccgacttcaacttttccggctttgacgtgactcccgcagactgggaacgctttgttaacgactccctgagctggctgggtcagttcgatatggtctgcgtcagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatctttgatagtagccgtgaagcgttagtcgccggtcttaaagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcacgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgtgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggtaagtaccgtcggcgcaggcgattccatggttggcggattgatttacggcctgctgatgcgcgagtccagcgaacatacgctgcgtctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga ->2 -atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga ->3 -atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga ->4 -ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctcttccgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccaacgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcagaaaggcgggattggtttgcgcggtccgggtgaaacccagctcgaaaccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgaaaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgctgctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaaatccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtctgcaggtacgaatgccgattgtcgactggcgtcgcctctgtaaacaagaaccagcgttgatcgaatacgtgatctag ->5 -gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga ->6 -atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctggtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatcagcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacgctgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcattcaaccggaatggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaacgctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgaggcgctggcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtgagctgaccagccttgtgcgcaaccttaatcaactgctcaaaagcgagcgcgaacgttataacaaataccgcacgaccctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatcagcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcataccccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttttggccgacagcatcccacacagaaagaggaataa ->7 -atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa ->8 -atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag ->9 -atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacattgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccaccgtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggccgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattactggaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcattcagggcggtacggaagtttgtcagcaaaaactggataccatgcgcaacgcaggcgtaaaagtgaacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggcgtcagcgcagagatcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacaggagaccggcaagctcggcgagatcctgttctttatgcgtgcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccggcaataactggcagttcgacggcgacgcggaaactattgcccactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgaaattaccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa ->10 -atgaaactttttctgacaacggcagcgctaactgcgaccttgatttccggcatggcatttgccagcgatcccgtcatcccctgggcgactaacagcggcggtacggaaagcacgcatattgcggcaatgggcgaggatttgaatgcgcagcatcagcagatcacccacacgcatgaaggcgtatgggcggccaattccggcagcattcaggctgatgaagccgcgctgaccagcaacaagccgccggtacaaggccacccggaattgatgccgcatcagggctaa ->11 -atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa ->12 -atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa ->13 -ctgaaactaatacctttttatttgctcgcgctattttctgccgcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaaccactgcagataacccgccttataccatgacaatattgccgccagtagaaggctgcggggaaaaatgtgtgaaactgaacgtacagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaaggacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctataaccgacagctgcaattactttgggctggcgtaataccgaaaaataaaaaggggaaatggataacgaaaatggaaaattactggttcaggcaggtaaaagcgcactaa ->14 -atgaaaaaattttatagctgtcttcctgtctttttactgatcggctgtgcccaggtgcccctcccttcctccgtgagcaaaccggtacagcaacctggcgctcagaaagagcaactggccaacgcaaatagtattgatgagtgtcagtctcttccgtatgtgccgtcagaccttgcgaagaataaatcattatcaaaccagaacgctgataattccgcatcaaaaaatagcgcaatcagctcaagcattttttgcgaaaaatataaacaaaccaaagaacaggcgcttaccttcttccaggaacatccacaatacatgcgttcgaaagaagatgaagagcaactcatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa ->15 -atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa ->16 -gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggcctcacaacatgcctggagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgtcagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcaccatccagtcggcaacctggttttccgcaggcaggtcgcttacggcggataaaaaaatgatggattgcggcagcggtatttatgcttcaatcaatacgttactgaaaaagtcgcagaataaaaatatagtcatttttacccataatcattgtttgacgtatattgcaaaaaataagcgcggcgtgaagtttgatccggactatcttaatgcgctcgttatgtatgcggaaaacggaaaactctttttagacggtgaatttgttcccggctag ->17 -atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga ->18 -atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/search/G9/blast/protein/hsps.txt b/tests/test_data/outputs/search/G9/blast/protein/hsps.txt deleted file mode 100755 index 4a5a697..0000000 --- a/tests/test_data/outputs/search/G9/blast/protein/hsps.txt +++ /dev/null @@ -1,19 +0,0 @@ -0 0 34 34 1 34 1 34 34 0 100.000 100 100 N/A 4.94e-20 64.3 -1 10 286 286 1 286 1 286 286 17 94.056 100 100 N/A 0.0 518 -2 11 324 324 1 324 1 324 324 0 100.000 100 100 N/A 0.0 640 -3 12 366 366 1 366 1 366 366 0 100.000 100 100 N/A 0.0 754 -4 13 427 427 1 427 1 427 427 9 97.892 100 100 N/A 0.0 846 -5 14 478 478 1 478 1 478 478 0 100.000 100 100 N/A 0.0 972 -6 15 488 488 1 488 1 488 488 14 97.131 100 100 N/A 0.0 972 -7 16 612 612 1 612 1 612 612 0 100.000 100 100 N/A 0.0 1241 -8 17 638 638 1 638 1 638 638 0 100.000 100 100 N/A 0.0 1326 -9 18 679 679 1 679 1 679 679 8 98.822 100 100 N/A 0.0 1403 -10 1 95 95 1 95 1 95 95 11 88.421 100 100 N/A 2.89e-51 147 -11 19 1645 1645 1 1645 1 1645 1645 0 100.000 100 100 N/A 0.0 3332 -12 2 109 109 1 109 1 109 109 0 100.000 100 100 N/A 3.09e-81 224 -13 3 139 139 1 139 1 139 139 9 93.525 100 100 N/A 3.70e-98 269 -14 4 148 148 1 148 1 148 148 11 92.568 100 100 N/A 1.56e-97 268 -15 5 181 181 1 181 1 181 181 0 100.000 100 100 N/A 1.79e-140 380 -16 6 202 202 1 202 1 202 202 14 93.069 100 100 N/A 2.36e-141 384 -17 7 214 214 1 214 1 214 214 0 100.000 100 100 N/A 1.75e-157 426 -18 8 228 228 1 228 1 228 228 0 100.000 100 100 N/A 6.32e-172 463 diff --git a/tests/test_data/outputs/search/G9/blast/protein/queries.fasta b/tests/test_data/outputs/search/G9/blast/protein/queries.fasta deleted file mode 100755 index 26b5df4..0000000 --- a/tests/test_data/outputs/search/G9/blast/protein/queries.fasta +++ /dev/null @@ -1,38 +0,0 @@ ->0 -MYDPPFLEALMITASFFAIFIIIVVSVLLLEGD* ->1 -VNLVKTTGLHAAGKGINVAKVLKDLGIDVTVGGFLGKDNQDGFQQLFSELGIANRFQVVQGRTRINVKLTEKDGEVTDFNFSGFDVTPADWERFVNDSLSWLGQFDMVCVSGSLPAGVSPEAFTDWMTRLRSQCPCIIFDSSREALVAGLKAAPWLVKPNRRELEIWAGRKLPEMKDVIDAAHALREQGIAHVVISLGAEGALWVNASGEWIAKPPAVDVVSTVGAGDSMVGGLIYGLLMRESSEHTLRLATAVAALAVSQSNVGITDRPQLAAMMARVDLQPFN* ->2 -MNKNKYSTPLLMLATILAGMLSPMQSAVNGQLGHWLQDGNACAVISFASGLVVMFFIIIARKETRQQFASIPTLIKKRKIPLWNWFAGLCGAMVVFSEGASASALGVATFQTALISALLLSGLLCDRFGIGVEEKKYFTPWRITGALFAVIATIFVVSPQWHSTSFILLAILPFLAGLLAGWQPAGNAKVAEATGSMLVSITWNFIVGFCVLGAALAIRIALGHVTIQLPDTWWMYLGGPLGLLSIGLMAILVRGLGLLMLGVASTAGQLLGSVLIDELIPSLGNTVYLVTIIGTLFALVGAIVTTIPEYRASKMAQKMEVSE* ->3 -MKGRWAKYVATGVMLAMLAACSSKPTDRGQQYKDGKFTQPFSLVNQPDAVGAPINAGDFAEQVDQIRSASPRLYTNQSNVYNAVQNWLRSGGDTRTMRQFGIDAWQMEGTDNYGNVQFTGYYTPVVQARHTRQGAFQYPIYSMPPKRGRLPSRAQIYAGALSDKYILAWSNSLMDNFIMDVQGSGYIDFGDGSPLNFFSYAGKNGWPYRSIGKVLIDRGEVKKEDMSMQAIREWGEKHSEAEVRELLEQNPSFVFFKPQSFAPVKGASAVPLIGRASVASDRSIIPPGTTLLAEVPLLDNNGKFSGQYELRLMVALDVGGAIKGQHFDIYQGIGPDAGHRAGWYNHYGRVWVLKSAPGAGNVFSG* ->4 -LFDRYDAGEQAVLVHIYFSQDKDMEDLQEFESLVSSAGVEAMQVITGSRKAPHPKYFVGEGKAVEIAEAVKATGAAVVLFDHALSPAQERNLERLCECRVIDRTGLILDIFAQRARTHEGKLQVELAQLRHLATRLVRGWTHLERQKGGIGLRGPGETQLETDRRLLRNRIVQIQSRLEKVEKQREQGRQSRIKADVPTVSLVGYTNAGKSTLFNQITEARVYAADQLFATLDPTLRRIDVADVGETVLADTVGFIRHLPYDLVAAFKATLQETRQATLLLHVVDAADVRVQENIEAVNTVLEEIDAHEIPTLMVMNKIDMLDDFEPRIDRDEENKPIRVWLSAQSGVGIPQLFQALTERLSGEVAQHTLRLLPQEGRLRSRFYQLQAIEKEWMEEDGSVSLQVRMPIVDWRRLCKQEPALIEYVI* ->5 -VAQRILVLGASGYIGQHLVFALSQQGHQVRAAARRVERLEKHRLANVSCHKVDLHWPENLPALLRDIDTVYYLVHGMGEGGDFIAHERQAALNVRDALRQTPVKQLIFLSSLQAPAHEQSDHLRARQLTADTLRDAGVPVTELRAGIIVGAGSAAFEVMRDMVYNLPILTPPRWVRSRTTPIALENLLYYLVGLLEHPAHEHRILEAAGPQVLSYQQQFERFMAVSGKRRPLIPVPFPTRWISVWFLNVITSVPPTTAKALIQGLRHDLLADDAALKKLIPQTLITFDDAVRRTLKEEEKLVNSSDWGYDALAFARWRPEYGYFPKQAGFTAQTPASLSALWQVVNRLGGKEGYFFGNILWQTRAAMDRLVGHKLAKGRPSHTLLKPGDTVDSWKVIIVEPEKQLTLLFGMKAPGLGRLSFTLHDKGRYREIDVRAWWHPHGMPGLIYWLLMIPAHLFIFRGMARRIARLAEQITEK* ->6 -MNKFARHFLPLSLRVRFLLATAGVVLVLSLAYGIVALVGYSVSFDKTTFRLLRGESNLFYTLAKWENNKISVELPENLDMQSPTMTLIYDETGKLLWTQRNIPWLIKSIQPEWLKTNGFHEIETNVDATSTLLSEDHSAQEKLKEVREDDDDAEMTHSVAVNIYPATTRMPQLTIVVVDTIPIELKRSYMVWSWFVYVLAANLLLVIPLLWIAAWWSLRPIEALAREVRELEDHHREMLNPETTRELTSLVRNLNQLLKSERERYNKYRTTLTDLTHSLKTPLAVLQSTLRSLRNEKMSVSKAEPVMLEQISRISQQIGYYLHRASMRGSGVLLSRELHPVAPLLDNLISALNKVYQRKGVNISMDISPEISFVGEQNDFVEVMGNVLDNACKYCLEFVEISARQTDDHLHIFVEDDGPGIPHSKRSLVFDRGQRADTLRPGQGVGLAVAREITEQYAGQIIASDSLLGGARMEVVFGRQHPTQKEE* ->7 -MTIQKRLLEAVEQKLLRPIDAQFALTVAGNDDPAVTLAAALLSHDAGEGHVCLPLSRLTLTEEAHPLLVAWISETATPIDWKKRLLASAAVSCGDSPAPLILCGDRLYLNRMWCNERTVARFFNEVNQAIAVDEDQLSRILDALFPPTDEVNWQKVAAAVALTRRISVISGGPGTGKTTTVAKLLAALIQMADGERCRIRLAAPTGKAAARLTESLGAALRQLPLTDAQKKRIPEDASTLHRLLGAQPGSQRLRHHAGNPLHLDVLVVDEASMIDLPMMSRLIDALPPHGRVIFLGDRDQLASVEAGAVLGDICAYVNAGFTAERARQLSRLTGSAIPAGAGTQAASLRDSLCLLQKSYRFGSDSGIGKLAAAINCGDRSAIQAVFQQGFSDIEKRTLQSSDDYAGMLDEALAGYGRYLRLLHEKAAPEAILQAFNEYQLLCALREGPFGVRGLNDRIEQAMVQQRKIQRHPHSRWYEGRPVMIARNDSALGLFNGDIGIALDRGQGLRVWFVMPDGTIKSVQPSRLPEHDTTWAMTVHKSQGSEFDHAALILPSQRSPVVTRELVYTAVTRARRRLSLYADERILAGAIVTRTERRSGLATLFDEVSRIG* ->8 -MQEVAMSSQEASKMLRTYNIAWWGNNYYDVNELGHISVCPDPDVPEARVDLAKLVKAREAQGQRLPALFCFPQILQHRLRSINAAFKRARESYGYNGDYFLVYPIKVNQHRRVIESLIHSGEPLGLEAGSKAELMAVLAHAGMTRSVIVCNGYKDREYIRLALIGEKMGHKVYLVIEKMSEIAIVLEEAERLNVVPRLGVRARLASQGSGKWQSSGGEKSKFGLAATQVLQLVETLRDAGRLDSLQLLHFHLGSQMANIRDIATGVRESARFYVELHKLGVNIQCFDVGGGLGVDYEGTRSQSDCSVNYGLNEYANNIIWAIGDACEEHGLPHPTVITESGRAVTAHHTVLVSNIIGVERNEYTDPTAPAEDAPRALQNLWETWQEMHKPGTRRSLREWLHDSQMDLHDIHIGYSSGAFSLQERAWAEQLYLSMCHEVQKQLDPQNRAHRPIIDELQERMADKMYVNFSLFQSMPDAWGIDQLFPVLPLEGLDQVPERRAVLLDITCDSDGAIDHYIDGDGIATTMPMPEYDPENPPMLGFFMVGAYQEILGNMHNLFGDTEAVDVFVFPDGSVEVELSDEGDTVADMLQYVQLDPKTLLTHFRDQVKQTDLDDALQQQFLEEFEAGLYGYTYLEDE* ->9 -MNSLPQRSTDFELTTSQDGFALSWQQRLILRHSAENPCLWIGAGVADIDMFRGNFSIKDKLNEKIALTEATVSELPDGWLVQFSRGATISATLRISADEAGRLTLDLQNDDLHHNRIWLRLAANPDDHIYGCGEQFSYFDLRGKPFPLWTSEQGVGRNKTSYVTWQADCKENAGGDYYWTFFPQPTFVSTQKYYCHVDNSCYMNFDFSAPEYHELALWEDKTTLRFECADTYIALLEKLTALLGRQPELPDWVYDGVTLGIQGGTEVCQQKLDTMRNAGVKVNGIWAQDWSGIRMTSFGKRVMWNWKWNSDNYPQLDSRIKQWKEEGVQFLSYINPYVASDKDLCAEAARHGYLAKDATGGDYLVEFGEFYGGVVDLTNPEAYDWFKDVIKKNMIALGCSGWMADFGEYLPTDTYLHNGVSAEIMHNAWPALWAKCNYEALQETGKLGEILFFMRAGYTGSQKYSTMMWAGDQNVDWSLDDGLASVVPAALSLAMTGHGLHHSDIGGYTTLFDMKRSKELLLRWCDFSAFTPMMRTHEGNRPGNNWQFDGDAETIAHFARMTTVFTTLKPYLKQAVAQNAATGLPVMRPLFLHYENDAATYTLKYQYLLGQDLLVAPVHEQGRCDWTLYLPEDHWVNIWTGEAHHGGEITVDAPIGKPPVFYRAKSEWALLFASLRNI* ->10 -MKLFLTTAALTATLISGMAFASDPVIPWATNSGGTESTHIAAMGEDLNAQHQQITHTHEGVWAANSGSIQADEAALTSNKPPVQGHPELMPHQG* ->11 -MKHLRVVACMIMLALAGCDNNDKTAPTTKSEAPAVAQPSPAQDPSQLQKLAQQSQGKALTLLDASEAQLDGAATLVLTFSIPLDPEQDFSRVVHVVDKKSGSVDGAWELAPNLKELRLRHLEPERVLVVTVDPAVKALNNATFGKSYEKTITTRDVQPSVGFASRGSLLPGKIAEGLPVMALNVNHVDVNFFRVKPGSLASFVSQWEYRSSLSNWESDNLLKMADLVYTGRFDLNPARNTREKLLLPLSDIKPLQQAGVYVAVMNQAGHYNYSNAATLFTLSDIGVSAHRYHSRLDIFTQSLENGAAQSGIEIVLLNDKGQTLAQATSDAQGHVQLEADKAAALLLARKEGQTTLLDLTLPALDLSEFNVAGAPGYSKQFFMFGPRDLYRPGETVILNGLLRDSDGKTLPDQPVKLEVVKPDGQVMRTVVSQPENGLYRLNYPLDINAPTGLWHVRANTGDNLLRSWDFHVEDFMPERMALNLTAQKTPLAPADEVKFSVVGYYLYGAPANGNTLQGQLFLRPLRDAVAALPGFQFGNIAEENLSRSLDEVQLTLDKGGRGEVSAASQWQEAHSPLQVILQASLLESGGRPVTRRVEQAIWPADTLPGIRPQFAAKAVYDYRTDTTVNQPIVDEDSNAAFDIVYANAQGEKKAVSGLQVRLIRERRDYYWNWSESEGWQSQFDQKDLVEGEQTLDLNADETGKVSFPVEWGAYRLEVKAPNETVSSVRFWAGYSWQDNSDGSGAARPDRVTLKLDKANYRPGDTMKLHIAAPVAGKGYAMVESSDGPLWWQAIDVPAQGLELTIPVDKTWNRHDLYLSTLVVRPGDKSRSATPKRAVGLLHLPLGDDNRRLDLALESPAKMRPNQPLTVRVKASVKHGEMPKQINVLVSAVDSGVLNITDYATPDPWQAFFGQKRYGADIYDIYGQVIEGQGRLAALRFGGDGDDLTRGGKPPVNHANIIAQQAQPITLNEQGEGVVTLPIGDFNGELRVMAQAWTADDFGRGESKVVVAAPVIAELNMPRFLAGGDVSRLVLDVTNLTDRPQTLNIALAASGLLELLSQQPQPVNLAPGVRTTLFVPVRALEGFGEGEIQATISGLNLPGETLGAQHKQWQIGVRPAWPAQTVNSGIALAPGESWHVPEQHLANVSPATLQGQLLLSGKPPLNLARYIRELKAYPYGCLEQTTSGLFPALYTNAAQLQSLGITGDSDEKRRAAVDIGISRILQMQRDNGGFALWDENGAEEPWLTAYAMDFLIRAGEQGYSVPPEAINRGNERLLRYLQDPGTMLIRYSDNTQASTFAAQAYAALVLARQQKAPLGALREIWERRSQAASGLPLMQLGIALNTMGDARRGEEAITLALNTPRQDERQWIADYGSSLRDNALMLSLLEENNLRPDAQNALLSSLSEQAFGQRWLSTQENNALFLAAHSRQASAGAWQAQTSLEAQPLSGDKALTRNLDADQLAALEVTNTGSQPLWLRLDSSGYPSSAPEPASNVLQIERQILGTDGQRKSLSSLRSGELVLVWLTVVADRNVPDALVVDLLPAGLELENQNLADSSASLPESGSEVQNLLNQMQQADIQYMEFRDDRFVAAVVVNEGQPVTLVYLARAVTPGTYQLAQPQVESMYAPQWRATGASEGLLIVTP* ->12 -MAKNRSRRLRKKMHIDEFQELGFSVAWRFPEGTSEEQIDKTVDDFINDVIEPNKLAFDGSGYLAWEGLICMQEIGKCTEEHQAIVRKWLEARNLEEVRTSELFDVWWD* ->13 -LKLIPFYLLALFSAASGATEINACKDLIGTWKTTADNPPYTMTILPPVEGCGEKCVKLNVQYELDVTHRNALYCHEGQEGVKGQGPMVIAFEGAYGGHAIGTYNRQLQLLWAGVIPKNKKGKWITKMENYWFRQVKAH* ->14 -MKKFYSCLPVFLLIGCAQVPLPSSVSKPVQQPGAQKEQLANANSIDECQSLPYVPSDLAKNKSLSNQNADNSASKNSAISSSIFCEKYKQTKEQALTFFQEHPQYMRSKEDEEQLMTEFKKVLLEPGSKNLSIYQTLLAAHERLQAL* ->15 -MRIKPDDNWRWYYDEEHDRMMLDLANGMLFRSRFSRKMLTPDAFCPTGFCVDDAALYFSFEEKCRDFELTKEQRAELVLNALVAIRYLKPQMPKSWHFVAHGEMWTPGTGDAASVWLSDTAEQVNLLVVEPGENAALCLLAQPGVVIAGRTMQLGDAIKIMNDRLKPQVHCHSFSLEQAV* ->16 -VLAFTLRFIKNKRYFAILAGALVIIAGLASQHAWSGNGLPQINGKALAALAKQHPVVVLFRHAERCDRSDNTCLSDSTGITVNGAQDARALGKAFSADIQNYNLYSSNTVRTIQSATWFSAGRSLTADKKMMDCGSGIYASINTLLKKSQNKNIVIFTHNHCLTYIAKNKRGVKFDPDYLNALVMYAENGKLFLDGEFVPG* ->17 -MGSNYIVIEGLEGAGKTTARDVVVETLEQLGIRNMIFTREPGGTQLAEKLRSLVLDIRSVGDEVITDKAEVLMFYAARVQLVETVIKPALAQGVWVIGDRHDLSTQAYQGGGRGIDQTMLATLRDAVLGDFRPDLTLYLDVTPEVGLKRARARGDLDRIEQESFDFFNRTRARYLELAAQDSRIRTIDATQPLDAVMRDIRATVTKWVQEQAA* ->18 -MKHIKKSVLVVLLTSHVAHASIVVGGTRLVFDGNNDESSINVENKDSKANLVQSWLSVADPQVTNKQVFIITPPLFRLDAGQKNSIRVIRSGAPLPADRESMYWLNIKGIPSIDDNASANRVEISINTQIKLIYRPPALTKSTPDSQSQQLKWQTAGDVITVNNPTPYYMNFASVTLNSHEVKSATFVPPKSSASFKLSSTAAPHGTVTWRLISDYGMSLEPHSGSF* diff --git a/tests/test_data/outputs/search/G9/run.json b/tests/test_data/outputs/search/G9/run.json index fae40d0..3f9db12 100755 --- a/tests/test_data/outputs/search/G9/run.json +++ b/tests/test_data/outputs/search/G9/run.json @@ -1,11 +1,15 @@ { - "analysis_start_time": "10/06/2024 11:12:21", + "analysis_start_time": "19/06/2024 10:43:53", "parameters": { - "query": "locidex/extract/G9/raw.extracted.seqs.fasta", - "outdir": "locidex/search/G9", + "command": "search", + "query": "test_dev/extract/G9/raw.extracted.seqs.fasta", + "outdir": "test_dev/search/G9", + "db": "test_set/db", + "db_group": null, "name": "G9", - "db": "locidex/db", "config": null, + "db_name": "Locidex Test Database", + "db_version": "1.0.0", "min_evalue": 0.0001, "min_dna_len": 1, "min_aa_len": 1, @@ -20,8 +24,16 @@ "format": null, "translation_table": 11, "annotate": false, - "force": true + "force": true, + "db_date": "2024/06/06", + "db_author": "James Robertson", + "db_desc": "Using Salmonella GCA_000007545.2 as source data", + "db_num_seqs": 20, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" }, - "result_file": "locidex/search/G9/seq_store.json", - "analysis_end_time": "10/06/2024 11:12:23" + "result_file": "test_dev/search/G9/seq_store.json", + "analysis_end_time": "19/06/2024 10:43:54" } \ No newline at end of file diff --git a/tests/test_data/outputs/search/G9/seq_store.json b/tests/test_data/outputs/search/G9/seq_store.json index 0b7e531..2fbd8a6 100755 --- a/tests/test_data/outputs/search/G9/seq_store.json +++ b/tests/test_data/outputs/search/G9/seq_store.json @@ -460,248 +460,286 @@ "parent_id": "locus_1:0:0:0", "locus_name": "locus_1:0:0:0", "seq_id": "locus_1:0:0:0", + "dna_seq": "atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa", + "dna_ambig_count": 0, "dna_hash": "d17b02d12afa7f832ee37df6f24a8f55", "dna_len": 102, + "aa_seq": "MYDPPFLEALMITASFFAIFIIIVVSVLLLEGD*", "aa_hash": "a931d1f75114576e60538364eb01a05f", "aa_len": 34, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "1": { "parent_id": "locus_11:10:0:1", "locus_name": "locus_11:10:0:1", "seq_id": "locus_11:10:0:1", + "dna_seq": "gtgaacctggtgaaaaccacgggtctgcacgcggcgggcaaaggcattaacgttgccaaagtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccgacttcaacttttccggctttgacgtgactcccgcagactgggaacgctttgttaacgactccctgagctggctgggtcagttcgatatggtctgcgtcagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatctttgatagtagccgtgaagcgttagtcgccggtcttaaagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcacgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgtgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggtaagtaccgtcggcgcaggcgattccatggttggcggattgatttacggcctgctgatgcgcgagtccagcgaacatacgctgcgtctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga", + "dna_ambig_count": 0, "dna_hash": "c4266f2f24fdd8e039113c6b0955af9f", "dna_len": 858, + "aa_seq": "VNLVKTTGLHAAGKGINVAKVLKDLGIDVTVGGFLGKDNQDGFQQLFSELGIANRFQVVQGRTRINVKLTEKDGEVTDFNFSGFDVTPADWERFVNDSLSWLGQFDMVCVSGSLPAGVSPEAFTDWMTRLRSQCPCIIFDSSREALVAGLKAAPWLVKPNRRELEIWAGRKLPEMKDVIDAAHALREQGIAHVVISLGAEGALWVNASGEWIAKPPAVDVVSTVGAGDSMVGGLIYGLLMRESSEHTLRLATAVAALAVSQSNVGITDRPQLAAMMARVDLQPFN*", "aa_hash": "9b9be0e0a2b6f84053716d6c14a0fb9a", "aa_len": 286, "start_codon": "gtg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "2": { "parent_id": "locus_12:11:0:2", "locus_name": "locus_12:11:0:2", "seq_id": "locus_12:11:0:2", + "dna_seq": "atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga", + "dna_ambig_count": 0, "dna_hash": "eb72da68c159497d5f0c8eeddc51b5ae", "dna_len": 972, + "aa_seq": "MNKNKYSTPLLMLATILAGMLSPMQSAVNGQLGHWLQDGNACAVISFASGLVVMFFIIIARKETRQQFASIPTLIKKRKIPLWNWFAGLCGAMVVFSEGASASALGVATFQTALISALLLSGLLCDRFGIGVEEKKYFTPWRITGALFAVIATIFVVSPQWHSTSFILLAILPFLAGLLAGWQPAGNAKVAEATGSMLVSITWNFIVGFCVLGAALAIRIALGHVTIQLPDTWWMYLGGPLGLLSIGLMAILVRGLGLLMLGVASTAGQLLGSVLIDELIPSLGNTVYLVTIIGTLFALVGAIVTTIPEYRASKMAQKMEVSE*", "aa_hash": "2fb77f6d0f615a030a08af74f3eda277", "aa_len": 324, "start_codon": "atg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "3": { "parent_id": "locus_13:12:0:3", "locus_name": "locus_13:12:0:3", "seq_id": "locus_13:12:0:3", + "dna_seq": "atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga", + "dna_ambig_count": 0, "dna_hash": "8f300259dcb46224bdc1fe5273107324", "dna_len": 1098, + "aa_seq": "MKGRWAKYVATGVMLAMLAACSSKPTDRGQQYKDGKFTQPFSLVNQPDAVGAPINAGDFAEQVDQIRSASPRLYTNQSNVYNAVQNWLRSGGDTRTMRQFGIDAWQMEGTDNYGNVQFTGYYTPVVQARHTRQGAFQYPIYSMPPKRGRLPSRAQIYAGALSDKYILAWSNSLMDNFIMDVQGSGYIDFGDGSPLNFFSYAGKNGWPYRSIGKVLIDRGEVKKEDMSMQAIREWGEKHSEAEVRELLEQNPSFVFFKPQSFAPVKGASAVPLIGRASVASDRSIIPPGTTLLAEVPLLDNNGKFSGQYELRLMVALDVGGAIKGQHFDIYQGIGPDAGHRAGWYNHYGRVWVLKSAPGAGNVFSG*", "aa_hash": "fd5140611af6e0d9f95426bf9beaa8b1", "aa_len": 366, "start_codon": "atg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "4": { "parent_id": "locus_14:13:0:4", "locus_name": "locus_14:13:0:4", "seq_id": "locus_14:13:0:4", + "dna_seq": "ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctcttccgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccaacgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcagaaaggcgggattggtttgcgcggtccgggtgaaacccagctcgaaaccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgaaaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgctgctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaaatccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtctgcaggtacgaatgccgattgtcgactggcgtcgcctctgtaaacaagaaccagcgttgatcgaatacgtgatctag", + "dna_ambig_count": 0, "dna_hash": "2fa0b06ed72e36b4071cab9d0b4f87d0", "dna_len": 1281, + "aa_seq": "LFDRYDAGEQAVLVHIYFSQDKDMEDLQEFESLVSSAGVEAMQVITGSRKAPHPKYFVGEGKAVEIAEAVKATGAAVVLFDHALSPAQERNLERLCECRVIDRTGLILDIFAQRARTHEGKLQVELAQLRHLATRLVRGWTHLERQKGGIGLRGPGETQLETDRRLLRNRIVQIQSRLEKVEKQREQGRQSRIKADVPTVSLVGYTNAGKSTLFNQITEARVYAADQLFATLDPTLRRIDVADVGETVLADTVGFIRHLPYDLVAAFKATLQETRQATLLLHVVDAADVRVQENIEAVNTVLEEIDAHEIPTLMVMNKIDMLDDFEPRIDRDEENKPIRVWLSAQSGVGIPQLFQALTERLSGEVAQHTLRLLPQEGRLRSRFYQLQAIEKEWMEEDGSVSLQVRMPIVDWRRLCKQEPALIEYVI*", "aa_hash": "bf5190f310477277da454725d434a8ee", "aa_len": 427, "start_codon": "ttg", - "stop_codon": "tag", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tag", + "count_internal_stop": 0 }, "5": { "parent_id": "locus_15:14:0:5", "locus_name": "locus_15:14:0:5", "seq_id": "locus_15:14:0:5", + "dna_seq": "gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga", + "dna_ambig_count": 0, "dna_hash": "bc98c2fe196a68a79036814396513a8d", "dna_len": 1434, + "aa_seq": "VAQRILVLGASGYIGQHLVFALSQQGHQVRAAARRVERLEKHRLANVSCHKVDLHWPENLPALLRDIDTVYYLVHGMGEGGDFIAHERQAALNVRDALRQTPVKQLIFLSSLQAPAHEQSDHLRARQLTADTLRDAGVPVTELRAGIIVGAGSAAFEVMRDMVYNLPILTPPRWVRSRTTPIALENLLYYLVGLLEHPAHEHRILEAAGPQVLSYQQQFERFMAVSGKRRPLIPVPFPTRWISVWFLNVITSVPPTTAKALIQGLRHDLLADDAALKKLIPQTLITFDDAVRRTLKEEEKLVNSSDWGYDALAFARWRPEYGYFPKQAGFTAQTPASLSALWQVVNRLGGKEGYFFGNILWQTRAAMDRLVGHKLAKGRPSHTLLKPGDTVDSWKVIIVEPEKQLTLLFGMKAPGLGRLSFTLHDKGRYREIDVRAWWHPHGMPGLIYWLLMIPAHLFIFRGMARRIARLAEQITEK*", "aa_hash": "5d8c3cf06444c3741ebfc57cc92db0d3", "aa_len": 478, "start_codon": "gtg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "6": { "parent_id": "locus_16:15:0:6", "locus_name": "locus_16:15:0:6", "seq_id": "locus_16:15:0:6", + "dna_seq": "atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctggtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatcagcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacgctgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcattcaaccggaatggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaacgctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgaggcgctggcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtgagctgaccagccttgtgcgcaaccttaatcaactgctcaaaagcgagcgcgaacgttataacaaataccgcacgaccctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatcagcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcataccccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttttggccgacagcatcccacacagaaagaggaataa", + "dna_ambig_count": 0, "dna_hash": "a9b3cb97dac3cda6e932a49bf9a507bd", "dna_len": 1464, + "aa_seq": "MNKFARHFLPLSLRVRFLLATAGVVLVLSLAYGIVALVGYSVSFDKTTFRLLRGESNLFYTLAKWENNKISVELPENLDMQSPTMTLIYDETGKLLWTQRNIPWLIKSIQPEWLKTNGFHEIETNVDATSTLLSEDHSAQEKLKEVREDDDDAEMTHSVAVNIYPATTRMPQLTIVVVDTIPIELKRSYMVWSWFVYVLAANLLLVIPLLWIAAWWSLRPIEALAREVRELEDHHREMLNPETTRELTSLVRNLNQLLKSERERYNKYRTTLTDLTHSLKTPLAVLQSTLRSLRNEKMSVSKAEPVMLEQISRISQQIGYYLHRASMRGSGVLLSRELHPVAPLLDNLISALNKVYQRKGVNISMDISPEISFVGEQNDFVEVMGNVLDNACKYCLEFVEISARQTDDHLHIFVEDDGPGIPHSKRSLVFDRGQRADTLRPGQGVGLAVAREITEQYAGQIIASDSLLGGARMEVVFGRQHPTQKEE*", "aa_hash": "3ca5f1d7b46eda9460608ef61603c12f", "aa_len": 488, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "7": { "parent_id": "locus_17:16:0:7", "locus_name": "locus_17:16:0:7", "seq_id": "locus_17:16:0:7", + "dna_seq": "atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa", + "dna_ambig_count": 0, "dna_hash": "a0d97d985483413f3c18bfe5833ae9ce", "dna_len": 1836, + "aa_seq": "MTIQKRLLEAVEQKLLRPIDAQFALTVAGNDDPAVTLAAALLSHDAGEGHVCLPLSRLTLTEEAHPLLVAWISETATPIDWKKRLLASAAVSCGDSPAPLILCGDRLYLNRMWCNERTVARFFNEVNQAIAVDEDQLSRILDALFPPTDEVNWQKVAAAVALTRRISVISGGPGTGKTTTVAKLLAALIQMADGERCRIRLAAPTGKAAARLTESLGAALRQLPLTDAQKKRIPEDASTLHRLLGAQPGSQRLRHHAGNPLHLDVLVVDEASMIDLPMMSRLIDALPPHGRVIFLGDRDQLASVEAGAVLGDICAYVNAGFTAERARQLSRLTGSAIPAGAGTQAASLRDSLCLLQKSYRFGSDSGIGKLAAAINCGDRSAIQAVFQQGFSDIEKRTLQSSDDYAGMLDEALAGYGRYLRLLHEKAAPEAILQAFNEYQLLCALREGPFGVRGLNDRIEQAMVQQRKIQRHPHSRWYEGRPVMIARNDSALGLFNGDIGIALDRGQGLRVWFVMPDGTIKSVQPSRLPEHDTTWAMTVHKSQGSEFDHAALILPSQRSPVVTRELVYTAVTRARRRLSLYADERILAGAIVTRTERRSGLATLFDEVSRIG*", "aa_hash": "bdd2b967a7a6337b5973dd723a62fa43", "aa_len": 612, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "8": { "parent_id": "locus_18:17:0:8", "locus_name": "locus_18:17:0:8", "seq_id": "locus_18:17:0:8", + "dna_seq": "atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag", + "dna_ambig_count": 0, "dna_hash": "b3021e979faa7600756c06dfadfcf14c", "dna_len": 1914, + "aa_seq": "MQEVAMSSQEASKMLRTYNIAWWGNNYYDVNELGHISVCPDPDVPEARVDLAKLVKAREAQGQRLPALFCFPQILQHRLRSINAAFKRARESYGYNGDYFLVYPIKVNQHRRVIESLIHSGEPLGLEAGSKAELMAVLAHAGMTRSVIVCNGYKDREYIRLALIGEKMGHKVYLVIEKMSEIAIVLEEAERLNVVPRLGVRARLASQGSGKWQSSGGEKSKFGLAATQVLQLVETLRDAGRLDSLQLLHFHLGSQMANIRDIATGVRESARFYVELHKLGVNIQCFDVGGGLGVDYEGTRSQSDCSVNYGLNEYANNIIWAIGDACEEHGLPHPTVITESGRAVTAHHTVLVSNIIGVERNEYTDPTAPAEDAPRALQNLWETWQEMHKPGTRRSLREWLHDSQMDLHDIHIGYSSGAFSLQERAWAEQLYLSMCHEVQKQLDPQNRAHRPIIDELQERMADKMYVNFSLFQSMPDAWGIDQLFPVLPLEGLDQVPERRAVLLDITCDSDGAIDHYIDGDGIATTMPMPEYDPENPPMLGFFMVGAYQEILGNMHNLFGDTEAVDVFVFPDGSVEVELSDEGDTVADMLQYVQLDPKTLLTHFRDQVKQTDLDDALQQQFLEEFEAGLYGYTYLEDE*", "aa_hash": "42c4a831ee79a27c47138fe96829814b", "aa_len": 638, "start_codon": "atg", - "stop_codon": "tag", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tag", + "count_internal_stop": 0 }, "9": { "parent_id": "locus_19:18:0:9", "locus_name": "locus_19:18:0:9", "seq_id": "locus_19:18:0:9", + "dna_seq": "atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacattgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccaccgtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggccgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattactggaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcattcagggcggtacggaagtttgtcagcaaaaactggataccatgcgcaacgcaggcgtaaaagtgaacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggcgtcagcgcagagatcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacaggagaccggcaagctcggcgagatcctgttctttatgcgtgcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccggcaataactggcagttcgacggcgacgcggaaactattgcccactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgaaattaccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa", + "dna_ambig_count": 0, "dna_hash": "de32372598811d63bcc1a0eaf6872644", "dna_len": 2037, + "aa_seq": "MNSLPQRSTDFELTTSQDGFALSWQQRLILRHSAENPCLWIGAGVADIDMFRGNFSIKDKLNEKIALTEATVSELPDGWLVQFSRGATISATLRISADEAGRLTLDLQNDDLHHNRIWLRLAANPDDHIYGCGEQFSYFDLRGKPFPLWTSEQGVGRNKTSYVTWQADCKENAGGDYYWTFFPQPTFVSTQKYYCHVDNSCYMNFDFSAPEYHELALWEDKTTLRFECADTYIALLEKLTALLGRQPELPDWVYDGVTLGIQGGTEVCQQKLDTMRNAGVKVNGIWAQDWSGIRMTSFGKRVMWNWKWNSDNYPQLDSRIKQWKEEGVQFLSYINPYVASDKDLCAEAARHGYLAKDATGGDYLVEFGEFYGGVVDLTNPEAYDWFKDVIKKNMIALGCSGWMADFGEYLPTDTYLHNGVSAEIMHNAWPALWAKCNYEALQETGKLGEILFFMRAGYTGSQKYSTMMWAGDQNVDWSLDDGLASVVPAALSLAMTGHGLHHSDIGGYTTLFDMKRSKELLLRWCDFSAFTPMMRTHEGNRPGNNWQFDGDAETIAHFARMTTVFTTLKPYLKQAVAQNAATGLPVMRPLFLHYENDAATYTLKYQYLLGQDLLVAPVHEQGRCDWTLYLPEDHWVNIWTGEAHHGGEITVDAPIGKPPVFYRAKSEWALLFASLRNI*", "aa_hash": "a48a4e4dc8c7f61a7be06a7f72142198", "aa_len": 679, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "10": { "parent_id": "locus_2:1:0:10", "locus_name": "locus_2:1:0:10", "seq_id": "locus_2:1:0:10", + "dna_seq": "atgaaactttttctgacaacggcagcgctaactgcgaccttgatttccggcatggcatttgccagcgatcccgtcatcccctgggcgactaacagcggcggtacggaaagcacgcatattgcggcaatgggcgaggatttgaatgcgcagcatcagcagatcacccacacgcatgaaggcgtatgggcggccaattccggcagcattcaggctgatgaagccgcgctgaccagcaacaagccgccggtacaaggccacccggaattgatgccgcatcagggctaa", + "dna_ambig_count": 0, "dna_hash": "8b70e777f6bbf2c91ff75947824b5976", "dna_len": 285, + "aa_seq": "MKLFLTTAALTATLISGMAFASDPVIPWATNSGGTESTHIAAMGEDLNAQHQQITHTHEGVWAANSGSIQADEAALTSNKPPVQGHPELMPHQG*", "aa_hash": "6e403f4ed2da629ea2ebfe18278ed120", "aa_len": 95, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "11": { "parent_id": "locus_20:19:0:11", "locus_name": "locus_20:19:0:11", "seq_id": "locus_20:19:0:11", + "dna_seq": "atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa", + "dna_ambig_count": 0, "dna_hash": "4461918e985715e4a2b07494e1f91326", "dna_len": 4935, + "aa_seq": "MKHLRVVACMIMLALAGCDNNDKTAPTTKSEAPAVAQPSPAQDPSQLQKLAQQSQGKALTLLDASEAQLDGAATLVLTFSIPLDPEQDFSRVVHVVDKKSGSVDGAWELAPNLKELRLRHLEPERVLVVTVDPAVKALNNATFGKSYEKTITTRDVQPSVGFASRGSLLPGKIAEGLPVMALNVNHVDVNFFRVKPGSLASFVSQWEYRSSLSNWESDNLLKMADLVYTGRFDLNPARNTREKLLLPLSDIKPLQQAGVYVAVMNQAGHYNYSNAATLFTLSDIGVSAHRYHSRLDIFTQSLENGAAQSGIEIVLLNDKGQTLAQATSDAQGHVQLEADKAAALLLARKEGQTTLLDLTLPALDLSEFNVAGAPGYSKQFFMFGPRDLYRPGETVILNGLLRDSDGKTLPDQPVKLEVVKPDGQVMRTVVSQPENGLYRLNYPLDINAPTGLWHVRANTGDNLLRSWDFHVEDFMPERMALNLTAQKTPLAPADEVKFSVVGYYLYGAPANGNTLQGQLFLRPLRDAVAALPGFQFGNIAEENLSRSLDEVQLTLDKGGRGEVSAASQWQEAHSPLQVILQASLLESGGRPVTRRVEQAIWPADTLPGIRPQFAAKAVYDYRTDTTVNQPIVDEDSNAAFDIVYANAQGEKKAVSGLQVRLIRERRDYYWNWSESEGWQSQFDQKDLVEGEQTLDLNADETGKVSFPVEWGAYRLEVKAPNETVSSVRFWAGYSWQDNSDGSGAARPDRVTLKLDKANYRPGDTMKLHIAAPVAGKGYAMVESSDGPLWWQAIDVPAQGLELTIPVDKTWNRHDLYLSTLVVRPGDKSRSATPKRAVGLLHLPLGDDNRRLDLALESPAKMRPNQPLTVRVKASVKHGEMPKQINVLVSAVDSGVLNITDYATPDPWQAFFGQKRYGADIYDIYGQVIEGQGRLAALRFGGDGDDLTRGGKPPVNHANIIAQQAQPITLNEQGEGVVTLPIGDFNGELRVMAQAWTADDFGRGESKVVVAAPVIAELNMPRFLAGGDVSRLVLDVTNLTDRPQTLNIALAASGLLELLSQQPQPVNLAPGVRTTLFVPVRALEGFGEGEIQATISGLNLPGETLGAQHKQWQIGVRPAWPAQTVNSGIALAPGESWHVPEQHLANVSPATLQGQLLLSGKPPLNLARYIRELKAYPYGCLEQTTSGLFPALYTNAAQLQSLGITGDSDEKRRAAVDIGISRILQMQRDNGGFALWDENGAEEPWLTAYAMDFLIRAGEQGYSVPPEAINRGNERLLRYLQDPGTMLIRYSDNTQASTFAAQAYAALVLARQQKAPLGALREIWERRSQAASGLPLMQLGIALNTMGDARRGEEAITLALNTPRQDERQWIADYGSSLRDNALMLSLLEENNLRPDAQNALLSSLSEQAFGQRWLSTQENNALFLAAHSRQASAGAWQAQTSLEAQPLSGDKALTRNLDADQLAALEVTNTGSQPLWLRLDSSGYPSSAPEPASNVLQIERQILGTDGQRKSLSSLRSGELVLVWLTVVADRNVPDALVVDLLPAGLELENQNLADSSASLPESGSEVQNLLNQMQQADIQYMEFRDDRFVAAVVVNEGQPVTLVYLARAVTPGTYQLAQPQVESMYAPQWRATGASEGLLIVTP*", "aa_hash": "ffe361d9273afbab0ed9009dd5aa4041", "aa_len": 1645, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "12": { "parent_id": "locus_3:2:0:12", "locus_name": "locus_3:2:0:12", "seq_id": "locus_3:2:0:12", + "dna_seq": "atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa", + "dna_ambig_count": 0, "dna_hash": "670705cd2a59c4a23a897ac656a888fe", "dna_len": 327, + "aa_seq": "MAKNRSRRLRKKMHIDEFQELGFSVAWRFPEGTSEEQIDKTVDDFINDVIEPNKLAFDGSGYLAWEGLICMQEIGKCTEEHQAIVRKWLEARNLEEVRTSELFDVWWD*", "aa_hash": "f46f4f7b2362abaf1440c9752e10670e", "aa_len": 109, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "13": { "parent_id": "locus_4:3:0:13", "locus_name": "locus_4:3:0:13", "seq_id": "locus_4:3:0:13", + "dna_seq": "ctgaaactaatacctttttatttgctcgcgctattttctgccgcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaaccactgcagataacccgccttataccatgacaatattgccgccagtagaaggctgcggggaaaaatgtgtgaaactgaacgtacagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaaggacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctataaccgacagctgcaattactttgggctggcgtaataccgaaaaataaaaaggggaaatggataacgaaaatggaaaattactggttcaggcaggtaaaagcgcactaa", + "dna_ambig_count": 0, "dna_hash": "73790840c76943caac0ebb3b2b3f0b98", "dna_len": 417, + "aa_seq": "LKLIPFYLLALFSAASGATEINACKDLIGTWKTTADNPPYTMTILPPVEGCGEKCVKLNVQYELDVTHRNALYCHEGQEGVKGQGPMVIAFEGAYGGHAIGTYNRQLQLLWAGVIPKNKKGKWITKMENYWFRQVKAH*", "aa_hash": "77784601d754a5f36152853592023b08", "aa_len": 139, "start_codon": "ctg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "14": { "parent_id": "locus_5:4:0:14", "locus_name": "locus_5:4:0:14", "seq_id": "locus_5:4:0:14", + "dna_seq": "atgaaaaaattttatagctgtcttcctgtctttttactgatcggctgtgcccaggtgcccctcccttcctccgtgagcaaaccggtacagcaacctggcgctcagaaagagcaactggccaacgcaaatagtattgatgagtgtcagtctcttccgtatgtgccgtcagaccttgcgaagaataaatcattatcaaaccagaacgctgataattccgcatcaaaaaatagcgcaatcagctcaagcattttttgcgaaaaatataaacaaaccaaagaacaggcgcttaccttcttccaggaacatccacaatacatgcgttcgaaagaagatgaagagcaactcatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa", + "dna_ambig_count": 0, "dna_hash": "8cf4341689dd00f74adfcc43d1f4a35e", "dna_len": 444, + "aa_seq": "MKKFYSCLPVFLLIGCAQVPLPSSVSKPVQQPGAQKEQLANANSIDECQSLPYVPSDLAKNKSLSNQNADNSASKNSAISSSIFCEKYKQTKEQALTFFQEHPQYMRSKEDEEQLMTEFKKVLLEPGSKNLSIYQTLLAAHERLQAL*", "aa_hash": "736cc3184dda2c5ac596f76753272622", "aa_len": 148, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "15": { "parent_id": "locus_6:5:0:15", "locus_name": "locus_6:5:0:15", "seq_id": "locus_6:5:0:15", + "dna_seq": "atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa", + "dna_ambig_count": 0, "dna_hash": "a11561f2804e2c32c78049f8b9aeb517", "dna_len": 543, + "aa_seq": "MRIKPDDNWRWYYDEEHDRMMLDLANGMLFRSRFSRKMLTPDAFCPTGFCVDDAALYFSFEEKCRDFELTKEQRAELVLNALVAIRYLKPQMPKSWHFVAHGEMWTPGTGDAASVWLSDTAEQVNLLVVEPGENAALCLLAQPGVVIAGRTMQLGDAIKIMNDRLKPQVHCHSFSLEQAV*", "aa_hash": "f7a904be6607ca5a6d9388ad9ca3693a", "aa_len": 181, "start_codon": "atg", - "stop_codon": "taa", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "taa", + "count_internal_stop": 0 }, "16": { "parent_id": "locus_7:6:0:16", "locus_name": "locus_7:6:0:16", "seq_id": "locus_7:6:0:16", + "dna_seq": "gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggcctcacaacatgcctggagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgtcagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcaccatccagtcggcaacctggttttccgcaggcaggtcgcttacggcggataaaaaaatgatggattgcggcagcggtatttatgcttcaatcaatacgttactgaaaaagtcgcagaataaaaatatagtcatttttacccataatcattgtttgacgtatattgcaaaaaataagcgcggcgtgaagtttgatccggactatcttaatgcgctcgttatgtatgcggaaaacggaaaactctttttagacggtgaatttgttcccggctag", + "dna_ambig_count": 0, "dna_hash": "49d9878c9d3071aa1d2f26cb947b784c", "dna_len": 606, + "aa_seq": "VLAFTLRFIKNKRYFAILAGALVIIAGLASQHAWSGNGLPQINGKALAALAKQHPVVVLFRHAERCDRSDNTCLSDSTGITVNGAQDARALGKAFSADIQNYNLYSSNTVRTIQSATWFSAGRSLTADKKMMDCGSGIYASINTLLKKSQNKNIVIFTHNHCLTYIAKNKRGVKFDPDYLNALVMYAENGKLFLDGEFVPG*", "aa_hash": "a1169e1ef4c2882247a9349da07cb6bd", "aa_len": 202, "start_codon": "gtg", - "stop_codon": "tag", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tag", + "count_internal_stop": 0 }, "17": { "parent_id": "locus_8:7:0:17", "locus_name": "locus_8:7:0:17", "seq_id": "locus_8:7:0:17", + "dna_seq": "atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga", + "dna_ambig_count": 0, "dna_hash": "7ebe74afecf146ec4db816c8deced64f", "dna_len": 642, + "aa_seq": "MGSNYIVIEGLEGAGKTTARDVVVETLEQLGIRNMIFTREPGGTQLAEKLRSLVLDIRSVGDEVITDKAEVLMFYAARVQLVETVIKPALAQGVWVIGDRHDLSTQAYQGGGRGIDQTMLATLRDAVLGDFRPDLTLYLDVTPEVGLKRARARGDLDRIEQESFDFFNRTRARYLELAAQDSRIRTIDATQPLDAVMRDIRATVTKWVQEQAA*", "aa_hash": "ad5a47cb3a55985083b963b0cf67f382", "aa_len": 214, "start_codon": "atg", - "stop_codon": "tga", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tga", + "count_internal_stop": 0 }, "18": { "parent_id": "locus_9:8:0:18", "locus_name": "locus_9:8:0:18", "seq_id": "locus_9:8:0:18", + "dna_seq": "atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag", + "dna_ambig_count": 0, "dna_hash": "41ebb36872854b2b33c8c028e23d8ad1", "dna_len": 684, + "aa_seq": "MKHIKKSVLVVLLTSHVAHASIVVGGTRLVFDGNNDESSINVENKDSKANLVQSWLSVADPQVTNKQVFIITPPLFRLDAGQKNSIRVIRSGAPLPADRESMYWLNIKGIPSIDDNASANRVEISINTQIKLIYRPPALTKSTPDSQSQQLKWQTAGDVITVNNPTPYYMNFASVTLNSHEVKSATFVPPKSSASFKLSSTAAPHGTVTWRLISDYGMSLEPHSGSF*", "aa_hash": "02989d1dbb505d15f06e76ddc9174b7e", "aa_len": 228, "start_codon": "atg", - "stop_codon": "tag", - "count_internal_stop": 0, - "dna_ambig_count": 0 + "end_codon": "tag", + "count_internal_stop": 0 } }, "query_hit_columns": [], diff --git a/tests/test_workflows.yml b/tests/test_workflows.yml index 7ed7fc8..a16eb47 100644 --- a/tests/test_workflows.yml +++ b/tests/test_workflows.yml @@ -60,7 +60,9 @@ - path: test_out/search/G1/seq_store.json md5sum: 9e33a22323f939371a203c47ebeede0d - path: test_out/report/conservative/G1/report.json + md5sum: e048b379915dc91e93d048cd89c78ae2 - path: test_out/report/normal/G1/report.json + md5sum: b5f39c7aceee1ce8062f32b66ef9d9f3 - name: Test G2 @@ -77,7 +79,9 @@ - path: test_out/search/G2/seq_store.json md5sum: 735c90328b01180800056f63e983c5a4 - path: test_out/report/conservative/G2/report.json + md5sum: b5999c77be20fef1f681a620cb5b071c - path: test_out/report/normal/G2/report.json + md5sum: b0e2c03eeb5ce19fca5ecf5980053dbf - name: Test G3 @@ -94,7 +98,9 @@ - path: test_out/search/G3/seq_store.json md5sum: 9a5b510de5cd2f5e454f7bf9dc10accf - path: test_out/report/conservative/G3/report.json + md5sum: 30bf0e9d8198b05d639b4a3ab04f7cff - path: test_out/report/normal/G3/report.json + md5sum: ac66a45d13640cbe65c8445ac42d73a4 - name: Test G4 @@ -111,7 +117,9 @@ - path: test_out/search/G4/seq_store.json md5sum: 1c150a532cbb42efe4794130b5fb5f5d - path: test_out/report/conservative/G4/report.json + md5sum: c4e56ec3a72f97a164dcdde217795434 - path: test_out/report/normal/G4/report.json + md5sum: 772fe43da898ad3bf4d59ae0463e72f9 - name: Test G5 @@ -128,7 +136,9 @@ - path: test_out/search/G5/seq_store.json md5sum: a0bab6959b2c9294292f4499d8c4563c - path: test_out/report/conservative/G5/report.json + md5sum: 09a5009eedc1a898448078a54f8e28a8 - path: test_out/report/normal/G5/report.json + md5sum: 1eafef6baebe88c84ea8b7a82183d6f5 - name: Test G6 @@ -145,7 +155,9 @@ - path: test_out/search/G6/seq_store.json md5sum: def35eb2b6301b8ab6fb31ecaf995d78 - path: test_out/report/conservative/G6/report.json + md5sum: a9bfc3bf4a32a181fa56f037a1265347 - path: test_out/report/normal/G6/report.json + md5sum: 1195e0a8d01aa8bc160061631c8198ac - name: Test G7 @@ -162,7 +174,9 @@ - path: test_out/search/G7/seq_store.json md5sum: 8e8ed91f9baaffc0ab27a2a26df7a647 - path: test_out/report/conservative/G7/report.json + md5sum: 16b7d2a1e6e8888b57bbc11cab682492 - path: test_out/report/normal/G7/report.json + md5sum: b6eab54e2d17a8eb0fc8fb275af54003 - name: Test G8 @@ -179,7 +193,9 @@ - path: test_out/search/G8/seq_store.json md5sum: 95673d95dcd5bd54afc81f8788e5ef97 - path: test_out/report/conservative/G8/report.json + md5sum: 312756f9eaf90483c95252daa92bea65 - path: test_out/report/normal/G8/report.json + md5sum: 3f4bb8c104469f7a07d7f3fe9dd0ed1d - name: Test G9 @@ -196,7 +212,9 @@ - path: test_out/search/G9/seq_store.json md5sum: c502a31310f6584a10b4378c5a1c2d82 - path: test_out/report/conservative/G9/report.json + md5sum: 55ab5181170966ead133c425fded1060 - path: test_out/report/normal/G9/report.json + md5sum: 6db1d3e7ae26152f8d041cecc1ac847b - name: Test G10 @@ -213,7 +231,9 @@ - path: test_out/search/G10/seq_store.json md5sum: cd28ea213bd681c7abba59e6cf68bdb8 - path: test_out/report/conservative/G10/report.json + md5sum: c22f72415ec2eac72c3260edfc099dca - path: test_out/report/normal/G10/report.json + md5sum: e30f1282431bb9923ace156acc89321c - name: Test G11 @@ -230,7 +250,9 @@ - path: test_out/search/G11/seq_store.json md5sum: 439b41382a87960e7d123afc060aaae8 - path: test_out/report/conservative/G11/report.json + md5sum: 2a36a4242d2742d7c0cfac39eefd482c - path: test_out/report/normal/G11/report.json + md5sum: 5f170a44fadf72ab35d5b2de8b2ef343 - name: Test G12 @@ -247,7 +269,9 @@ - path: test_out/search/G12/seq_store. md5sum: ba3cd7d9e5e243b85cf5cb8347fb1c3f - path: test_out/report/conservative/G12/report.json + md5sum: 05cb031b16a5e40299d0f7ccefe7ac8c - path: test_out/report/normal/G12/report.json + md5sum: b4a398254e55838aaeeb3a97a9b51181 - name: Test G13 @@ -264,7 +288,9 @@ - path: test_out/search/G13/seq_store.json md5sum: c5f24a2fcdc2b118485db8373f239ac5 - path: test_out/report/conservative/G13/report.json + md5sum: 6b2d0b51683274983ea117d84a1113ee - path: test_out/report/normal/G13/report.json + md5sum: fc00bd027755743198d97355354b53d1 - name: Test G14 @@ -281,4 +307,6 @@ - path: test_out/search/G14/seq_store.json md5sum: 35aab22a5e554b3db63940b873e267b9 - path: test_out/report/conservative/G14/report.json - - path: test_out/report/normal/G14/report.json \ No newline at end of file + md5sum: 1de0c6f82e484fb6a14a438e256b058b + - path: test_out/report/normal/G14/report.json + md5sum: 212127f73816ffc3450958e23d26aa50 \ No newline at end of file From ea43c5e36db24d6bd6a76fe065f829c1d2aa25ed Mon Sep 17 00:00:00 2001 From: Matthew Wells Date: Wed, 19 Jun 2024 11:09:44 -0500 Subject: [PATCH 4/4] updated tests to include dev outputs Outputs of updated reports have been verified agains the original 'profile.json' outputs --- .../extract/G1/blast_db/contigs.fasta.nog | Bin 0 -> 36 bytes .../extract/G1/blast_db/contigs.fasta.nos | Bin 0 -> 18 bytes .../extract/G10/blast_db/contigs.fasta.nog | Bin 0 -> 36 bytes .../extract/G10/blast_db/contigs.fasta.nos | Bin 0 -> 18 bytes .../extract/G11/blast_db/contigs.fasta.nog | Bin 0 -> 36 bytes .../extract/G11/blast_db/contigs.fasta.nos | Bin 0 -> 18 bytes .../extract/G12/blast_db/contigs.fasta.nog | Bin 0 -> 36 bytes .../extract/G12/blast_db/contigs.fasta.nos | Bin 0 -> 18 bytes .../extract/G13/blast_db/contigs.fasta.nog | Bin 0 -> 36 bytes .../extract/G13/blast_db/contigs.fasta.nos | Bin 0 -> 18 bytes .../extract/G14/blast_db/contigs.fasta.nog | Bin 0 -> 36 bytes .../extract/G14/blast_db/contigs.fasta.nos | Bin 0 -> 18 bytes .../extract/G2/blast_db/contigs.fasta.nog | Bin 0 -> 36 bytes .../extract/G2/blast_db/contigs.fasta.nos | Bin 0 -> 18 bytes .../extract/G3/blast_db/contigs.fasta.nog | Bin 0 -> 36 bytes .../extract/G3/blast_db/contigs.fasta.nos | Bin 0 -> 18 bytes .../extract/G4/blast_db/contigs.fasta.nog | Bin 0 -> 36 bytes .../extract/G4/blast_db/contigs.fasta.nos | Bin 0 -> 18 bytes .../extract/G5/blast_db/contigs.fasta.nog | Bin 0 -> 36 bytes .../extract/G5/blast_db/contigs.fasta.nos | Bin 0 -> 18 bytes .../extract/G6/blast_db/contigs.fasta.nog | Bin 0 -> 36 bytes .../extract/G6/blast_db/contigs.fasta.nos | Bin 0 -> 18 bytes .../extract/G7/blast_db/contigs.fasta.nog | Bin 0 -> 36 bytes .../extract/G7/blast_db/contigs.fasta.nos | Bin 0 -> 18 bytes .../extract/G8/blast_db/contigs.fasta.nog | Bin 0 -> 36 bytes .../extract/G8/blast_db/contigs.fasta.nos | Bin 0 -> 18 bytes .../extract/G9/blast_db/contigs.fasta.nog | Bin 0 -> 36 bytes .../extract/G9/blast_db/contigs.fasta.nos | Bin 0 -> 18 bytes .../report/conservative/G1/report.json | 49 ++++++++++++++++++ .../outputs/report/conservative/G1/run.json | 21 ++++++++ .../report/conservative/G10/report.json | 49 ++++++++++++++++++ .../outputs/report/conservative/G10/run.json | 21 ++++++++ .../report/conservative/G11/report.json | 49 ++++++++++++++++++ .../outputs/report/conservative/G11/run.json | 21 ++++++++ .../report/conservative/G12/report.json | 49 ++++++++++++++++++ .../outputs/report/conservative/G12/run.json | 21 ++++++++ .../report/conservative/G13/report.json | 49 ++++++++++++++++++ .../outputs/report/conservative/G13/run.json | 21 ++++++++ .../report/conservative/G14/report.json | 49 ++++++++++++++++++ .../outputs/report/conservative/G14/run.json | 21 ++++++++ .../report/conservative/G2/report.json | 49 ++++++++++++++++++ .../outputs/report/conservative/G2/run.json | 21 ++++++++ .../report/conservative/G3/report.json | 49 ++++++++++++++++++ .../outputs/report/conservative/G3/run.json | 21 ++++++++ .../report/conservative/G4/report.json | 49 ++++++++++++++++++ .../outputs/report/conservative/G4/run.json | 21 ++++++++ .../report/conservative/G5/report.json | 49 ++++++++++++++++++ .../outputs/report/conservative/G5/run.json | 21 ++++++++ .../report/conservative/G6/report.json | 49 ++++++++++++++++++ .../outputs/report/conservative/G6/run.json | 21 ++++++++ .../report/conservative/G7/report.json | 49 ++++++++++++++++++ .../outputs/report/conservative/G7/run.json | 21 ++++++++ .../report/conservative/G8/report.json | 49 ++++++++++++++++++ .../outputs/report/conservative/G8/run.json | 21 ++++++++ .../report/conservative/G9/report.json | 49 ++++++++++++++++++ .../outputs/report/conservative/G9/run.json | 21 ++++++++ .../outputs/report/normal/G1/report.json | 49 ++++++++++++++++++ .../outputs/report/normal/G1/run.json | 21 ++++++++ .../outputs/report/normal/G10/report.json | 49 ++++++++++++++++++ .../outputs/report/normal/G10/run.json | 21 ++++++++ .../outputs/report/normal/G11/report.json | 49 ++++++++++++++++++ .../outputs/report/normal/G11/run.json | 21 ++++++++ .../outputs/report/normal/G12/report.json | 49 ++++++++++++++++++ .../outputs/report/normal/G12/run.json | 21 ++++++++ .../outputs/report/normal/G13/report.json | 49 ++++++++++++++++++ .../outputs/report/normal/G13/run.json | 21 ++++++++ .../outputs/report/normal/G14/report.json | 49 ++++++++++++++++++ .../outputs/report/normal/G14/run.json | 21 ++++++++ .../outputs/report/normal/G2/report.json | 49 ++++++++++++++++++ .../outputs/report/normal/G2/run.json | 21 ++++++++ .../outputs/report/normal/G3/report.json | 49 ++++++++++++++++++ .../outputs/report/normal/G3/run.json | 21 ++++++++ .../outputs/report/normal/G4/report.json | 49 ++++++++++++++++++ .../outputs/report/normal/G4/run.json | 21 ++++++++ .../outputs/report/normal/G5/report.json | 49 ++++++++++++++++++ .../outputs/report/normal/G5/run.json | 21 ++++++++ .../outputs/report/normal/G6/report.json | 49 ++++++++++++++++++ .../outputs/report/normal/G6/run.json | 21 ++++++++ .../outputs/report/normal/G7/report.json | 49 ++++++++++++++++++ .../outputs/report/normal/G7/run.json | 21 ++++++++ .../outputs/report/normal/G8/report.json | 49 ++++++++++++++++++ .../outputs/report/normal/G8/run.json | 21 ++++++++ .../outputs/report/normal/G9/report.json | 49 ++++++++++++++++++ .../outputs/report/normal/G9/run.json | 21 ++++++++ .../outputs/search/G1/nucleotide/hsps.txt | 20 +++++++ .../search/G1/nucleotide/queries.fasta | 40 ++++++++++++++ .../outputs/search/G1/protein/hsps.txt | 20 +++++++ .../outputs/search/G1/protein/queries.fasta | 40 ++++++++++++++ .../outputs/search/G10/nucleotide/hsps.txt | 19 +++++++ .../search/G10/nucleotide/queries.fasta | 38 ++++++++++++++ .../outputs/search/G10/protein/hsps.txt | 19 +++++++ .../outputs/search/G10/protein/queries.fasta | 38 ++++++++++++++ .../outputs/search/G11/nucleotide/hsps.txt | 21 ++++++++ .../search/G11/nucleotide/queries.fasta | 42 +++++++++++++++ .../outputs/search/G11/protein/hsps.txt | 21 ++++++++ .../outputs/search/G11/protein/queries.fasta | 42 +++++++++++++++ .../outputs/search/G12/nucleotide/hsps.txt | 21 ++++++++ .../search/G12/nucleotide/queries.fasta | 42 +++++++++++++++ .../outputs/search/G12/protein/hsps.txt | 21 ++++++++ .../outputs/search/G12/protein/queries.fasta | 42 +++++++++++++++ .../outputs/search/G13/nucleotide/hsps.txt | 21 ++++++++ .../search/G13/nucleotide/queries.fasta | 42 +++++++++++++++ .../outputs/search/G13/protein/hsps.txt | 21 ++++++++ .../outputs/search/G13/protein/queries.fasta | 42 +++++++++++++++ .../outputs/search/G14/nucleotide/hsps.txt | 21 ++++++++ .../search/G14/nucleotide/queries.fasta | 42 +++++++++++++++ .../outputs/search/G14/protein/hsps.txt | 21 ++++++++ .../outputs/search/G14/protein/queries.fasta | 42 +++++++++++++++ .../outputs/search/G2/nucleotide/hsps.txt | 20 +++++++ .../search/G2/nucleotide/queries.fasta | 40 ++++++++++++++ .../outputs/search/G2/protein/hsps.txt | 20 +++++++ .../outputs/search/G2/protein/queries.fasta | 40 ++++++++++++++ .../outputs/search/G3/nucleotide/hsps.txt | 20 +++++++ .../search/G3/nucleotide/queries.fasta | 40 ++++++++++++++ .../outputs/search/G3/protein/hsps.txt | 20 +++++++ .../outputs/search/G3/protein/queries.fasta | 40 ++++++++++++++ .../outputs/search/G4/nucleotide/hsps.txt | 20 +++++++ .../search/G4/nucleotide/queries.fasta | 40 ++++++++++++++ .../outputs/search/G4/protein/hsps.txt | 20 +++++++ .../outputs/search/G4/protein/queries.fasta | 40 ++++++++++++++ .../outputs/search/G5/nucleotide/hsps.txt | 20 +++++++ .../search/G5/nucleotide/queries.fasta | 40 ++++++++++++++ .../outputs/search/G5/protein/hsps.txt | 20 +++++++ .../outputs/search/G5/protein/queries.fasta | 40 ++++++++++++++ .../outputs/search/G6/nucleotide/hsps.txt | 20 +++++++ .../search/G6/nucleotide/queries.fasta | 40 ++++++++++++++ .../outputs/search/G6/protein/hsps.txt | 20 +++++++ .../outputs/search/G6/protein/queries.fasta | 40 ++++++++++++++ .../outputs/search/G7/nucleotide/hsps.txt | 20 +++++++ .../search/G7/nucleotide/queries.fasta | 40 ++++++++++++++ .../outputs/search/G7/protein/hsps.txt | 20 +++++++ .../outputs/search/G7/protein/queries.fasta | 40 ++++++++++++++ .../outputs/search/G8/nucleotide/hsps.txt | 20 +++++++ .../search/G8/nucleotide/queries.fasta | 40 ++++++++++++++ .../outputs/search/G8/protein/hsps.txt | 20 +++++++ .../outputs/search/G8/protein/queries.fasta | 40 ++++++++++++++ .../outputs/search/G9/nucleotide/hsps.txt | 19 +++++++ .../search/G9/nucleotide/queries.fasta | 38 ++++++++++++++ .../outputs/search/G9/protein/hsps.txt | 19 +++++++ .../outputs/search/G9/protein/queries.fasta | 38 ++++++++++++++ tests/test_workflows.yml | 16 +----- 141 files changed, 3653 insertions(+), 15 deletions(-) create mode 100755 tests/test_data/outputs/extract/G1/blast_db/contigs.fasta.nog create mode 100755 tests/test_data/outputs/extract/G1/blast_db/contigs.fasta.nos create mode 100755 tests/test_data/outputs/extract/G10/blast_db/contigs.fasta.nog create mode 100755 tests/test_data/outputs/extract/G10/blast_db/contigs.fasta.nos create mode 100755 tests/test_data/outputs/extract/G11/blast_db/contigs.fasta.nog create mode 100755 tests/test_data/outputs/extract/G11/blast_db/contigs.fasta.nos create mode 100755 tests/test_data/outputs/extract/G12/blast_db/contigs.fasta.nog create mode 100755 tests/test_data/outputs/extract/G12/blast_db/contigs.fasta.nos create mode 100755 tests/test_data/outputs/extract/G13/blast_db/contigs.fasta.nog create mode 100755 tests/test_data/outputs/extract/G13/blast_db/contigs.fasta.nos create mode 100755 tests/test_data/outputs/extract/G14/blast_db/contigs.fasta.nog create mode 100755 tests/test_data/outputs/extract/G14/blast_db/contigs.fasta.nos create mode 100755 tests/test_data/outputs/extract/G2/blast_db/contigs.fasta.nog create mode 100755 tests/test_data/outputs/extract/G2/blast_db/contigs.fasta.nos create mode 100755 tests/test_data/outputs/extract/G3/blast_db/contigs.fasta.nog create mode 100755 tests/test_data/outputs/extract/G3/blast_db/contigs.fasta.nos create mode 100755 tests/test_data/outputs/extract/G4/blast_db/contigs.fasta.nog create mode 100755 tests/test_data/outputs/extract/G4/blast_db/contigs.fasta.nos create mode 100755 tests/test_data/outputs/extract/G5/blast_db/contigs.fasta.nog create mode 100755 tests/test_data/outputs/extract/G5/blast_db/contigs.fasta.nos create mode 100755 tests/test_data/outputs/extract/G6/blast_db/contigs.fasta.nog create mode 100755 tests/test_data/outputs/extract/G6/blast_db/contigs.fasta.nos create mode 100755 tests/test_data/outputs/extract/G7/blast_db/contigs.fasta.nog create mode 100755 tests/test_data/outputs/extract/G7/blast_db/contigs.fasta.nos create mode 100755 tests/test_data/outputs/extract/G8/blast_db/contigs.fasta.nog create mode 100755 tests/test_data/outputs/extract/G8/blast_db/contigs.fasta.nos create mode 100755 tests/test_data/outputs/extract/G9/blast_db/contigs.fasta.nog create mode 100755 tests/test_data/outputs/extract/G9/blast_db/contigs.fasta.nos create mode 100755 tests/test_data/outputs/report/conservative/G1/report.json create mode 100755 tests/test_data/outputs/report/conservative/G1/run.json create mode 100755 tests/test_data/outputs/report/conservative/G10/report.json create mode 100755 tests/test_data/outputs/report/conservative/G10/run.json create mode 100755 tests/test_data/outputs/report/conservative/G11/report.json create mode 100755 tests/test_data/outputs/report/conservative/G11/run.json create mode 100755 tests/test_data/outputs/report/conservative/G12/report.json create mode 100755 tests/test_data/outputs/report/conservative/G12/run.json create mode 100755 tests/test_data/outputs/report/conservative/G13/report.json create mode 100755 tests/test_data/outputs/report/conservative/G13/run.json create mode 100755 tests/test_data/outputs/report/conservative/G14/report.json create mode 100755 tests/test_data/outputs/report/conservative/G14/run.json create mode 100755 tests/test_data/outputs/report/conservative/G2/report.json create mode 100755 tests/test_data/outputs/report/conservative/G2/run.json create mode 100755 tests/test_data/outputs/report/conservative/G3/report.json create mode 100755 tests/test_data/outputs/report/conservative/G3/run.json create mode 100755 tests/test_data/outputs/report/conservative/G4/report.json create mode 100755 tests/test_data/outputs/report/conservative/G4/run.json create mode 100755 tests/test_data/outputs/report/conservative/G5/report.json create mode 100755 tests/test_data/outputs/report/conservative/G5/run.json create mode 100755 tests/test_data/outputs/report/conservative/G6/report.json create mode 100755 tests/test_data/outputs/report/conservative/G6/run.json create mode 100755 tests/test_data/outputs/report/conservative/G7/report.json create mode 100755 tests/test_data/outputs/report/conservative/G7/run.json create mode 100755 tests/test_data/outputs/report/conservative/G8/report.json create mode 100755 tests/test_data/outputs/report/conservative/G8/run.json create mode 100755 tests/test_data/outputs/report/conservative/G9/report.json create mode 100755 tests/test_data/outputs/report/conservative/G9/run.json create mode 100755 tests/test_data/outputs/report/normal/G1/report.json create mode 100755 tests/test_data/outputs/report/normal/G1/run.json create mode 100755 tests/test_data/outputs/report/normal/G10/report.json create mode 100755 tests/test_data/outputs/report/normal/G10/run.json create mode 100755 tests/test_data/outputs/report/normal/G11/report.json create mode 100755 tests/test_data/outputs/report/normal/G11/run.json create mode 100755 tests/test_data/outputs/report/normal/G12/report.json create mode 100755 tests/test_data/outputs/report/normal/G12/run.json create mode 100755 tests/test_data/outputs/report/normal/G13/report.json create mode 100755 tests/test_data/outputs/report/normal/G13/run.json create mode 100755 tests/test_data/outputs/report/normal/G14/report.json create mode 100755 tests/test_data/outputs/report/normal/G14/run.json create mode 100755 tests/test_data/outputs/report/normal/G2/report.json create mode 100755 tests/test_data/outputs/report/normal/G2/run.json create mode 100755 tests/test_data/outputs/report/normal/G3/report.json create mode 100755 tests/test_data/outputs/report/normal/G3/run.json create mode 100755 tests/test_data/outputs/report/normal/G4/report.json create mode 100755 tests/test_data/outputs/report/normal/G4/run.json create mode 100755 tests/test_data/outputs/report/normal/G5/report.json create mode 100755 tests/test_data/outputs/report/normal/G5/run.json create mode 100755 tests/test_data/outputs/report/normal/G6/report.json create mode 100755 tests/test_data/outputs/report/normal/G6/run.json create mode 100755 tests/test_data/outputs/report/normal/G7/report.json create mode 100755 tests/test_data/outputs/report/normal/G7/run.json create mode 100755 tests/test_data/outputs/report/normal/G8/report.json create mode 100755 tests/test_data/outputs/report/normal/G8/run.json create mode 100755 tests/test_data/outputs/report/normal/G9/report.json create mode 100755 tests/test_data/outputs/report/normal/G9/run.json create mode 100755 tests/test_data/outputs/search/G1/nucleotide/hsps.txt create mode 100755 tests/test_data/outputs/search/G1/nucleotide/queries.fasta create mode 100755 tests/test_data/outputs/search/G1/protein/hsps.txt create mode 100755 tests/test_data/outputs/search/G1/protein/queries.fasta create mode 100755 tests/test_data/outputs/search/G10/nucleotide/hsps.txt create mode 100755 tests/test_data/outputs/search/G10/nucleotide/queries.fasta create mode 100755 tests/test_data/outputs/search/G10/protein/hsps.txt create mode 100755 tests/test_data/outputs/search/G10/protein/queries.fasta create mode 100755 tests/test_data/outputs/search/G11/nucleotide/hsps.txt create mode 100755 tests/test_data/outputs/search/G11/nucleotide/queries.fasta create mode 100755 tests/test_data/outputs/search/G11/protein/hsps.txt create mode 100755 tests/test_data/outputs/search/G11/protein/queries.fasta create mode 100755 tests/test_data/outputs/search/G12/nucleotide/hsps.txt create mode 100755 tests/test_data/outputs/search/G12/nucleotide/queries.fasta create mode 100755 tests/test_data/outputs/search/G12/protein/hsps.txt create mode 100755 tests/test_data/outputs/search/G12/protein/queries.fasta create mode 100755 tests/test_data/outputs/search/G13/nucleotide/hsps.txt create mode 100755 tests/test_data/outputs/search/G13/nucleotide/queries.fasta create mode 100755 tests/test_data/outputs/search/G13/protein/hsps.txt create mode 100755 tests/test_data/outputs/search/G13/protein/queries.fasta create mode 100755 tests/test_data/outputs/search/G14/nucleotide/hsps.txt create mode 100755 tests/test_data/outputs/search/G14/nucleotide/queries.fasta create mode 100755 tests/test_data/outputs/search/G14/protein/hsps.txt create mode 100755 tests/test_data/outputs/search/G14/protein/queries.fasta create mode 100755 tests/test_data/outputs/search/G2/nucleotide/hsps.txt create mode 100755 tests/test_data/outputs/search/G2/nucleotide/queries.fasta create mode 100755 tests/test_data/outputs/search/G2/protein/hsps.txt create mode 100755 tests/test_data/outputs/search/G2/protein/queries.fasta create mode 100755 tests/test_data/outputs/search/G3/nucleotide/hsps.txt create mode 100755 tests/test_data/outputs/search/G3/nucleotide/queries.fasta create mode 100755 tests/test_data/outputs/search/G3/protein/hsps.txt create mode 100755 tests/test_data/outputs/search/G3/protein/queries.fasta create mode 100755 tests/test_data/outputs/search/G4/nucleotide/hsps.txt create mode 100755 tests/test_data/outputs/search/G4/nucleotide/queries.fasta create mode 100755 tests/test_data/outputs/search/G4/protein/hsps.txt create mode 100755 tests/test_data/outputs/search/G4/protein/queries.fasta create mode 100755 tests/test_data/outputs/search/G5/nucleotide/hsps.txt create mode 100755 tests/test_data/outputs/search/G5/nucleotide/queries.fasta create mode 100755 tests/test_data/outputs/search/G5/protein/hsps.txt create mode 100755 tests/test_data/outputs/search/G5/protein/queries.fasta create mode 100755 tests/test_data/outputs/search/G6/nucleotide/hsps.txt create mode 100755 tests/test_data/outputs/search/G6/nucleotide/queries.fasta create mode 100755 tests/test_data/outputs/search/G6/protein/hsps.txt create mode 100755 tests/test_data/outputs/search/G6/protein/queries.fasta create mode 100755 tests/test_data/outputs/search/G7/nucleotide/hsps.txt create mode 100755 tests/test_data/outputs/search/G7/nucleotide/queries.fasta create mode 100755 tests/test_data/outputs/search/G7/protein/hsps.txt create mode 100755 tests/test_data/outputs/search/G7/protein/queries.fasta create mode 100755 tests/test_data/outputs/search/G8/nucleotide/hsps.txt create mode 100755 tests/test_data/outputs/search/G8/nucleotide/queries.fasta create mode 100755 tests/test_data/outputs/search/G8/protein/hsps.txt create mode 100755 tests/test_data/outputs/search/G8/protein/queries.fasta create mode 100755 tests/test_data/outputs/search/G9/nucleotide/hsps.txt create mode 100755 tests/test_data/outputs/search/G9/nucleotide/queries.fasta create mode 100755 tests/test_data/outputs/search/G9/protein/hsps.txt create mode 100755 tests/test_data/outputs/search/G9/protein/queries.fasta diff --git a/tests/test_data/outputs/extract/G1/blast_db/contigs.fasta.nog b/tests/test_data/outputs/extract/G1/blast_db/contigs.fasta.nog new file mode 100755 index 0000000000000000000000000000000000000000..e4342d3b5eb4173360f9ee6286514d5721507544 GIT binary patch literal 36 YcmZQzU|?i`02UwxV?%`)82$qR01C7O0{{R3 literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G1/blast_db/contigs.fasta.nos b/tests/test_data/outputs/extract/G1/blast_db/contigs.fasta.nos new file mode 100755 index 0000000000000000000000000000000000000000..85e7853f23040577fff3dbd41dc347f637a3be30 GIT binary patch literal 18 PcmZQ%fB+^a&1e7s0BQg= literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G10/blast_db/contigs.fasta.nog b/tests/test_data/outputs/extract/G10/blast_db/contigs.fasta.nog new file mode 100755 index 0000000000000000000000000000000000000000..e4342d3b5eb4173360f9ee6286514d5721507544 GIT binary patch literal 36 YcmZQzU|?i`02UwxV?%`)82$qR01C7O0{{R3 literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G10/blast_db/contigs.fasta.nos b/tests/test_data/outputs/extract/G10/blast_db/contigs.fasta.nos new file mode 100755 index 0000000000000000000000000000000000000000..85e7853f23040577fff3dbd41dc347f637a3be30 GIT binary patch literal 18 PcmZQ%fB+^a&1e7s0BQg= literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G11/blast_db/contigs.fasta.nog b/tests/test_data/outputs/extract/G11/blast_db/contigs.fasta.nog new file mode 100755 index 0000000000000000000000000000000000000000..e4342d3b5eb4173360f9ee6286514d5721507544 GIT binary patch literal 36 YcmZQzU|?i`02UwxV?%`)82$qR01C7O0{{R3 literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G11/blast_db/contigs.fasta.nos b/tests/test_data/outputs/extract/G11/blast_db/contigs.fasta.nos new file mode 100755 index 0000000000000000000000000000000000000000..85e7853f23040577fff3dbd41dc347f637a3be30 GIT binary patch literal 18 PcmZQ%fB+^a&1e7s0BQg= literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G12/blast_db/contigs.fasta.nog b/tests/test_data/outputs/extract/G12/blast_db/contigs.fasta.nog new file mode 100755 index 0000000000000000000000000000000000000000..e4342d3b5eb4173360f9ee6286514d5721507544 GIT binary patch literal 36 YcmZQzU|?i`02UwxV?%`)82$qR01C7O0{{R3 literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G12/blast_db/contigs.fasta.nos b/tests/test_data/outputs/extract/G12/blast_db/contigs.fasta.nos new file mode 100755 index 0000000000000000000000000000000000000000..85e7853f23040577fff3dbd41dc347f637a3be30 GIT binary patch literal 18 PcmZQ%fB+^a&1e7s0BQg= literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G13/blast_db/contigs.fasta.nog b/tests/test_data/outputs/extract/G13/blast_db/contigs.fasta.nog new file mode 100755 index 0000000000000000000000000000000000000000..e4342d3b5eb4173360f9ee6286514d5721507544 GIT binary patch literal 36 YcmZQzU|?i`02UwxV?%`)82$qR01C7O0{{R3 literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G13/blast_db/contigs.fasta.nos b/tests/test_data/outputs/extract/G13/blast_db/contigs.fasta.nos new file mode 100755 index 0000000000000000000000000000000000000000..85e7853f23040577fff3dbd41dc347f637a3be30 GIT binary patch literal 18 PcmZQ%fB+^a&1e7s0BQg= literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G14/blast_db/contigs.fasta.nog b/tests/test_data/outputs/extract/G14/blast_db/contigs.fasta.nog new file mode 100755 index 0000000000000000000000000000000000000000..e4342d3b5eb4173360f9ee6286514d5721507544 GIT binary patch literal 36 YcmZQzU|?i`02UwxV?%`)82$qR01C7O0{{R3 literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G14/blast_db/contigs.fasta.nos b/tests/test_data/outputs/extract/G14/blast_db/contigs.fasta.nos new file mode 100755 index 0000000000000000000000000000000000000000..85e7853f23040577fff3dbd41dc347f637a3be30 GIT binary patch literal 18 PcmZQ%fB+^a&1e7s0BQg= literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G2/blast_db/contigs.fasta.nog b/tests/test_data/outputs/extract/G2/blast_db/contigs.fasta.nog new file mode 100755 index 0000000000000000000000000000000000000000..e4342d3b5eb4173360f9ee6286514d5721507544 GIT binary patch literal 36 YcmZQzU|?i`02UwxV?%`)82$qR01C7O0{{R3 literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G2/blast_db/contigs.fasta.nos b/tests/test_data/outputs/extract/G2/blast_db/contigs.fasta.nos new file mode 100755 index 0000000000000000000000000000000000000000..85e7853f23040577fff3dbd41dc347f637a3be30 GIT binary patch literal 18 PcmZQ%fB+^a&1e7s0BQg= literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G3/blast_db/contigs.fasta.nog b/tests/test_data/outputs/extract/G3/blast_db/contigs.fasta.nog new file mode 100755 index 0000000000000000000000000000000000000000..e4342d3b5eb4173360f9ee6286514d5721507544 GIT binary patch literal 36 YcmZQzU|?i`02UwxV?%`)82$qR01C7O0{{R3 literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G3/blast_db/contigs.fasta.nos b/tests/test_data/outputs/extract/G3/blast_db/contigs.fasta.nos new file mode 100755 index 0000000000000000000000000000000000000000..85e7853f23040577fff3dbd41dc347f637a3be30 GIT binary patch literal 18 PcmZQ%fB+^a&1e7s0BQg= literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G4/blast_db/contigs.fasta.nog b/tests/test_data/outputs/extract/G4/blast_db/contigs.fasta.nog new file mode 100755 index 0000000000000000000000000000000000000000..e4342d3b5eb4173360f9ee6286514d5721507544 GIT binary patch literal 36 YcmZQzU|?i`02UwxV?%`)82$qR01C7O0{{R3 literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G4/blast_db/contigs.fasta.nos b/tests/test_data/outputs/extract/G4/blast_db/contigs.fasta.nos new file mode 100755 index 0000000000000000000000000000000000000000..85e7853f23040577fff3dbd41dc347f637a3be30 GIT binary patch literal 18 PcmZQ%fB+^a&1e7s0BQg= literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G5/blast_db/contigs.fasta.nog b/tests/test_data/outputs/extract/G5/blast_db/contigs.fasta.nog new file mode 100755 index 0000000000000000000000000000000000000000..e4342d3b5eb4173360f9ee6286514d5721507544 GIT binary patch literal 36 YcmZQzU|?i`02UwxV?%`)82$qR01C7O0{{R3 literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G5/blast_db/contigs.fasta.nos b/tests/test_data/outputs/extract/G5/blast_db/contigs.fasta.nos new file mode 100755 index 0000000000000000000000000000000000000000..85e7853f23040577fff3dbd41dc347f637a3be30 GIT binary patch literal 18 PcmZQ%fB+^a&1e7s0BQg= literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G6/blast_db/contigs.fasta.nog b/tests/test_data/outputs/extract/G6/blast_db/contigs.fasta.nog new file mode 100755 index 0000000000000000000000000000000000000000..e4342d3b5eb4173360f9ee6286514d5721507544 GIT binary patch literal 36 YcmZQzU|?i`02UwxV?%`)82$qR01C7O0{{R3 literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G6/blast_db/contigs.fasta.nos b/tests/test_data/outputs/extract/G6/blast_db/contigs.fasta.nos new file mode 100755 index 0000000000000000000000000000000000000000..85e7853f23040577fff3dbd41dc347f637a3be30 GIT binary patch literal 18 PcmZQ%fB+^a&1e7s0BQg= literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G7/blast_db/contigs.fasta.nog b/tests/test_data/outputs/extract/G7/blast_db/contigs.fasta.nog new file mode 100755 index 0000000000000000000000000000000000000000..e4342d3b5eb4173360f9ee6286514d5721507544 GIT binary patch literal 36 YcmZQzU|?i`02UwxV?%`)82$qR01C7O0{{R3 literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G7/blast_db/contigs.fasta.nos b/tests/test_data/outputs/extract/G7/blast_db/contigs.fasta.nos new file mode 100755 index 0000000000000000000000000000000000000000..85e7853f23040577fff3dbd41dc347f637a3be30 GIT binary patch literal 18 PcmZQ%fB+^a&1e7s0BQg= literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G8/blast_db/contigs.fasta.nog b/tests/test_data/outputs/extract/G8/blast_db/contigs.fasta.nog new file mode 100755 index 0000000000000000000000000000000000000000..e4342d3b5eb4173360f9ee6286514d5721507544 GIT binary patch literal 36 YcmZQzU|?i`02UwxV?%`)82$qR01C7O0{{R3 literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G8/blast_db/contigs.fasta.nos b/tests/test_data/outputs/extract/G8/blast_db/contigs.fasta.nos new file mode 100755 index 0000000000000000000000000000000000000000..85e7853f23040577fff3dbd41dc347f637a3be30 GIT binary patch literal 18 PcmZQ%fB+^a&1e7s0BQg= literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G9/blast_db/contigs.fasta.nog b/tests/test_data/outputs/extract/G9/blast_db/contigs.fasta.nog new file mode 100755 index 0000000000000000000000000000000000000000..e4342d3b5eb4173360f9ee6286514d5721507544 GIT binary patch literal 36 YcmZQzU|?i`02UwxV?%`)82$qR01C7O0{{R3 literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/extract/G9/blast_db/contigs.fasta.nos b/tests/test_data/outputs/extract/G9/blast_db/contigs.fasta.nos new file mode 100755 index 0000000000000000000000000000000000000000..85e7853f23040577fff3dbd41dc347f637a3be30 GIT binary patch literal 18 PcmZQ%fB+^a&1e7s0BQg= literal 0 HcmV?d00001 diff --git a/tests/test_data/outputs/report/conservative/G1/report.json b/tests/test_data/outputs/report/conservative/G1/report.json new file mode 100755 index 0000000..576e7d5 --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G1/report.json @@ -0,0 +1,49 @@ +{ + "db_info": { + "db_name": "Locidex Test Database", + "db_version": "1.0.0", + "db_date": "2024/06/06", + "db_author": "James Robertson", + "db_desc": "Using Salmonella GCA_000007545.2 as source data", + "db_num_seqs": 20, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" + }, + "parameters": { + "mode": "conservative", + "min_match_ident": 100, + "min_match_cov": 100, + "max_ambiguous": 0, + "max_internal_stops": 0 + }, + "data": { + "sample_name": "G1", + "profile": { + "G1": { + "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", + "locus_2": "8b70e777f6bbf2c91ff75947824b5976", + "locus_3": "670705cd2a59c4a23a897ac656a888fe", + "locus_4": "73790840c76943caac0ebb3b2b3f0b98", + "locus_5": "8cf4341689dd00f74adfcc43d1f4a35e", + "locus_6": "a11561f2804e2c32c78049f8b9aeb517", + "locus_7": "49d9878c9d3071aa1d2f26cb947b784c", + "locus_8": "7ebe74afecf146ec4db816c8deced64f", + "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", + "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", + "locus_11": "c4266f2f24fdd8e039113c6b0955af9f", + "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", + "locus_13": "8f300259dcb46224bdc1fe5273107324", + "locus_14": "2fa0b06ed72e36b4071cab9d0b4f87d0", + "locus_15": "bc98c2fe196a68a79036814396513a8d", + "locus_16": "a9b3cb97dac3cda6e932a49bf9a507bd", + "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", + "locus_18": "b3021e979faa7600756c06dfadfcf14c", + "locus_19": "de32372598811d63bcc1a0eaf6872644", + "locus_20": "4461918e985715e4a2b07494e1f91326" + } + }, + "seq_data": {} + } +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/conservative/G1/run.json b/tests/test_data/outputs/report/conservative/G1/run.json new file mode 100755 index 0000000..f014f92 --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G1/run.json @@ -0,0 +1,21 @@ +{ + "analysis_start_time": "19/06/2024 10:44:09", + "parameters": { + "command": "report", + "input": "test_dev/search/G1/seq_store.json", + "fasta": null, + "config": null, + "outdir": "test_dev/report/conservative/G1", + "name": "G1", + "mode": "conservative", + "prop": "locus_name", + "max_ambig": 0, + "max_stop": 0, + "match_ident": 100, + "match_cov": 100, + "translation_table": 11, + "force": true + }, + "result_file": "test_dev/report/conservative/G1/report.json", + "analysis_end_time": "19/06/2024 10:44:09" +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/conservative/G10/report.json b/tests/test_data/outputs/report/conservative/G10/report.json new file mode 100755 index 0000000..67aa88a --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G10/report.json @@ -0,0 +1,49 @@ +{ + "db_info": { + "db_name": "Locidex Test Database", + "db_version": "1.0.0", + "db_date": "2024/06/06", + "db_author": "James Robertson", + "db_desc": "Using Salmonella GCA_000007545.2 as source data", + "db_num_seqs": 20, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" + }, + "parameters": { + "mode": "conservative", + "min_match_ident": 100, + "min_match_cov": 100, + "max_ambiguous": 0, + "max_internal_stops": 0 + }, + "data": { + "sample_name": "G10", + "profile": { + "G10": { + "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", + "locus_2": "e35184c8ff18e9116fc8faef20532f56", + "locus_3": "670705cd2a59c4a23a897ac656a888fe", + "locus_4": "ac1b21798c0f672ad26f5a91ea278590", + "locus_5": "d00defcca8588f21ce16fa1d0ac13389", + "locus_6": "a11561f2804e2c32c78049f8b9aeb517", + "locus_7": "dc94bf1ec4ff9bed2a1f460cbd958656", + "locus_8": "7ebe74afecf146ec4db816c8deced64f", + "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", + "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", + "locus_11": "-", + "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", + "locus_13": "8f300259dcb46224bdc1fe5273107324", + "locus_14": "b9060019038526aa6fc38d2f7510edc6", + "locus_15": "bc98c2fe196a68a79036814396513a8d", + "locus_16": "16e55766c603fe33c9e75d8e81743ae2", + "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", + "locus_18": "b3021e979faa7600756c06dfadfcf14c", + "locus_19": "a012eee23637b48e39b00808a057e35d", + "locus_20": "4461918e985715e4a2b07494e1f91326" + } + }, + "seq_data": {} + } +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/conservative/G10/run.json b/tests/test_data/outputs/report/conservative/G10/run.json new file mode 100755 index 0000000..717a5e5 --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G10/run.json @@ -0,0 +1,21 @@ +{ + "analysis_start_time": "19/06/2024 10:44:27", + "parameters": { + "command": "report", + "input": "test_dev/search/G10/seq_store.json", + "fasta": null, + "config": null, + "outdir": "test_dev/report/conservative/G10", + "name": "G10", + "mode": "conservative", + "prop": "locus_name", + "max_ambig": 0, + "max_stop": 0, + "match_ident": 100, + "match_cov": 100, + "translation_table": 11, + "force": true + }, + "result_file": "test_dev/report/conservative/G10/report.json", + "analysis_end_time": "19/06/2024 10:44:27" +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/conservative/G11/report.json b/tests/test_data/outputs/report/conservative/G11/report.json new file mode 100755 index 0000000..00c2a66 --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G11/report.json @@ -0,0 +1,49 @@ +{ + "db_info": { + "db_name": "Locidex Test Database", + "db_version": "1.0.0", + "db_date": "2024/06/06", + "db_author": "James Robertson", + "db_desc": "Using Salmonella GCA_000007545.2 as source data", + "db_num_seqs": 20, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" + }, + "parameters": { + "mode": "conservative", + "min_match_ident": 100, + "min_match_cov": 100, + "max_ambiguous": 0, + "max_internal_stops": 0 + }, + "data": { + "sample_name": "G11", + "profile": { + "G11": { + "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", + "locus_2": "8b70e777f6bbf2c91ff75947824b5976", + "locus_3": "670705cd2a59c4a23a897ac656a888fe", + "locus_4": "73790840c76943caac0ebb3b2b3f0b98", + "locus_5": "8cf4341689dd00f74adfcc43d1f4a35e", + "locus_6": "a11561f2804e2c32c78049f8b9aeb517", + "locus_7": "49d9878c9d3071aa1d2f26cb947b784c", + "locus_8": "7ebe74afecf146ec4db816c8deced64f", + "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", + "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", + "locus_11": "c4266f2f24fdd8e039113c6b0955af9f", + "locus_12": "-", + "locus_13": "8f300259dcb46224bdc1fe5273107324", + "locus_14": "2fa0b06ed72e36b4071cab9d0b4f87d0", + "locus_15": "bc98c2fe196a68a79036814396513a8d", + "locus_16": "a9b3cb97dac3cda6e932a49bf9a507bd", + "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", + "locus_18": "b3021e979faa7600756c06dfadfcf14c", + "locus_19": "de32372598811d63bcc1a0eaf6872644", + "locus_20": "4461918e985715e4a2b07494e1f91326" + } + }, + "seq_data": {} + } +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/conservative/G11/run.json b/tests/test_data/outputs/report/conservative/G11/run.json new file mode 100755 index 0000000..ed0d9aa --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G11/run.json @@ -0,0 +1,21 @@ +{ + "analysis_start_time": "19/06/2024 10:44:29", + "parameters": { + "command": "report", + "input": "test_dev/search/G11/seq_store.json", + "fasta": null, + "config": null, + "outdir": "test_dev/report/conservative/G11", + "name": "G11", + "mode": "conservative", + "prop": "locus_name", + "max_ambig": 0, + "max_stop": 0, + "match_ident": 100, + "match_cov": 100, + "translation_table": 11, + "force": true + }, + "result_file": "test_dev/report/conservative/G11/report.json", + "analysis_end_time": "19/06/2024 10:44:29" +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/conservative/G12/report.json b/tests/test_data/outputs/report/conservative/G12/report.json new file mode 100755 index 0000000..09cadae --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G12/report.json @@ -0,0 +1,49 @@ +{ + "db_info": { + "db_name": "Locidex Test Database", + "db_version": "1.0.0", + "db_date": "2024/06/06", + "db_author": "James Robertson", + "db_desc": "Using Salmonella GCA_000007545.2 as source data", + "db_num_seqs": 20, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" + }, + "parameters": { + "mode": "conservative", + "min_match_ident": 100, + "min_match_cov": 100, + "max_ambiguous": 0, + "max_internal_stops": 0 + }, + "data": { + "sample_name": "G12", + "profile": { + "G12": { + "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", + "locus_2": "e35184c8ff18e9116fc8faef20532f56", + "locus_3": "670705cd2a59c4a23a897ac656a888fe", + "locus_4": "ac1b21798c0f672ad26f5a91ea278590", + "locus_5": "d00defcca8588f21ce16fa1d0ac13389", + "locus_6": "a11561f2804e2c32c78049f8b9aeb517", + "locus_7": "dc94bf1ec4ff9bed2a1f460cbd958656", + "locus_8": "7ebe74afecf146ec4db816c8deced64f", + "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", + "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", + "locus_11": "5b128d659955716833ce42f2bb060212", + "locus_12": "-", + "locus_13": "8f300259dcb46224bdc1fe5273107324", + "locus_14": "b9060019038526aa6fc38d2f7510edc6", + "locus_15": "bc98c2fe196a68a79036814396513a8d", + "locus_16": "16e55766c603fe33c9e75d8e81743ae2", + "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", + "locus_18": "b3021e979faa7600756c06dfadfcf14c", + "locus_19": "a012eee23637b48e39b00808a057e35d", + "locus_20": "4461918e985715e4a2b07494e1f91326" + } + }, + "seq_data": {} + } +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/conservative/G12/run.json b/tests/test_data/outputs/report/conservative/G12/run.json new file mode 100755 index 0000000..ca7f4f9 --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G12/run.json @@ -0,0 +1,21 @@ +{ + "analysis_start_time": "19/06/2024 10:44:30", + "parameters": { + "command": "report", + "input": "test_dev/search/G12/seq_store.json", + "fasta": null, + "config": null, + "outdir": "test_dev/report/conservative/G12", + "name": "G12", + "mode": "conservative", + "prop": "locus_name", + "max_ambig": 0, + "max_stop": 0, + "match_ident": 100, + "match_cov": 100, + "translation_table": 11, + "force": true + }, + "result_file": "test_dev/report/conservative/G12/report.json", + "analysis_end_time": "19/06/2024 10:44:31" +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/conservative/G13/report.json b/tests/test_data/outputs/report/conservative/G13/report.json new file mode 100755 index 0000000..04a6d16 --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G13/report.json @@ -0,0 +1,49 @@ +{ + "db_info": { + "db_name": "Locidex Test Database", + "db_version": "1.0.0", + "db_date": "2024/06/06", + "db_author": "James Robertson", + "db_desc": "Using Salmonella GCA_000007545.2 as source data", + "db_num_seqs": 20, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" + }, + "parameters": { + "mode": "conservative", + "min_match_ident": 100, + "min_match_cov": 100, + "max_ambiguous": 0, + "max_internal_stops": 0 + }, + "data": { + "sample_name": "G13", + "profile": { + "G13": { + "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", + "locus_2": "8b70e777f6bbf2c91ff75947824b5976", + "locus_3": "670705cd2a59c4a23a897ac656a888fe", + "locus_4": "73790840c76943caac0ebb3b2b3f0b98", + "locus_5": "8cf4341689dd00f74adfcc43d1f4a35e", + "locus_6": "a11561f2804e2c32c78049f8b9aeb517", + "locus_7": "49d9878c9d3071aa1d2f26cb947b784c", + "locus_8": "7ebe74afecf146ec4db816c8deced64f", + "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", + "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", + "locus_11": "c4266f2f24fdd8e039113c6b0955af9f", + "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", + "locus_13": "-", + "locus_14": "2fa0b06ed72e36b4071cab9d0b4f87d0", + "locus_15": "bc98c2fe196a68a79036814396513a8d", + "locus_16": "a9b3cb97dac3cda6e932a49bf9a507bd", + "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", + "locus_18": "b3021e979faa7600756c06dfadfcf14c", + "locus_19": "de32372598811d63bcc1a0eaf6872644", + "locus_20": "4461918e985715e4a2b07494e1f91326" + } + }, + "seq_data": {} + } +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/conservative/G13/run.json b/tests/test_data/outputs/report/conservative/G13/run.json new file mode 100755 index 0000000..fc41622 --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G13/run.json @@ -0,0 +1,21 @@ +{ + "analysis_start_time": "19/06/2024 10:44:32", + "parameters": { + "command": "report", + "input": "test_dev/search/G13/seq_store.json", + "fasta": null, + "config": null, + "outdir": "test_dev/report/conservative/G13", + "name": "G13", + "mode": "conservative", + "prop": "locus_name", + "max_ambig": 0, + "max_stop": 0, + "match_ident": 100, + "match_cov": 100, + "translation_table": 11, + "force": true + }, + "result_file": "test_dev/report/conservative/G13/report.json", + "analysis_end_time": "19/06/2024 10:44:32" +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/conservative/G14/report.json b/tests/test_data/outputs/report/conservative/G14/report.json new file mode 100755 index 0000000..230f85f --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G14/report.json @@ -0,0 +1,49 @@ +{ + "db_info": { + "db_name": "Locidex Test Database", + "db_version": "1.0.0", + "db_date": "2024/06/06", + "db_author": "James Robertson", + "db_desc": "Using Salmonella GCA_000007545.2 as source data", + "db_num_seqs": 20, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" + }, + "parameters": { + "mode": "conservative", + "min_match_ident": 100, + "min_match_cov": 100, + "max_ambiguous": 0, + "max_internal_stops": 0 + }, + "data": { + "sample_name": "G14", + "profile": { + "G14": { + "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", + "locus_2": "e35184c8ff18e9116fc8faef20532f56", + "locus_3": "670705cd2a59c4a23a897ac656a888fe", + "locus_4": "ac1b21798c0f672ad26f5a91ea278590", + "locus_5": "d00defcca8588f21ce16fa1d0ac13389", + "locus_6": "a11561f2804e2c32c78049f8b9aeb517", + "locus_7": "dc94bf1ec4ff9bed2a1f460cbd958656", + "locus_8": "7ebe74afecf146ec4db816c8deced64f", + "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", + "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", + "locus_11": "5b128d659955716833ce42f2bb060212", + "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", + "locus_13": "-", + "locus_14": "b9060019038526aa6fc38d2f7510edc6", + "locus_15": "bc98c2fe196a68a79036814396513a8d", + "locus_16": "16e55766c603fe33c9e75d8e81743ae2", + "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", + "locus_18": "b3021e979faa7600756c06dfadfcf14c", + "locus_19": "a012eee23637b48e39b00808a057e35d", + "locus_20": "4461918e985715e4a2b07494e1f91326" + } + }, + "seq_data": {} + } +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/conservative/G14/run.json b/tests/test_data/outputs/report/conservative/G14/run.json new file mode 100755 index 0000000..f0d7670 --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G14/run.json @@ -0,0 +1,21 @@ +{ + "analysis_start_time": "19/06/2024 10:44:34", + "parameters": { + "command": "report", + "input": "test_dev/search/G14/seq_store.json", + "fasta": null, + "config": null, + "outdir": "test_dev/report/conservative/G14", + "name": "G14", + "mode": "conservative", + "prop": "locus_name", + "max_ambig": 0, + "max_stop": 0, + "match_ident": 100, + "match_cov": 100, + "translation_table": 11, + "force": true + }, + "result_file": "test_dev/report/conservative/G14/report.json", + "analysis_end_time": "19/06/2024 10:44:34" +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/conservative/G2/report.json b/tests/test_data/outputs/report/conservative/G2/report.json new file mode 100755 index 0000000..898aa88 --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G2/report.json @@ -0,0 +1,49 @@ +{ + "db_info": { + "db_name": "Locidex Test Database", + "db_version": "1.0.0", + "db_date": "2024/06/06", + "db_author": "James Robertson", + "db_desc": "Using Salmonella GCA_000007545.2 as source data", + "db_num_seqs": 20, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" + }, + "parameters": { + "mode": "conservative", + "min_match_ident": 100, + "min_match_cov": 100, + "max_ambiguous": 0, + "max_internal_stops": 0 + }, + "data": { + "sample_name": "G2", + "profile": { + "G2": { + "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", + "locus_2": "e35184c8ff18e9116fc8faef20532f56", + "locus_3": "670705cd2a59c4a23a897ac656a888fe", + "locus_4": "ac1b21798c0f672ad26f5a91ea278590", + "locus_5": "d00defcca8588f21ce16fa1d0ac13389", + "locus_6": "a11561f2804e2c32c78049f8b9aeb517", + "locus_7": "dc94bf1ec4ff9bed2a1f460cbd958656", + "locus_8": "7ebe74afecf146ec4db816c8deced64f", + "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", + "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", + "locus_11": "5b128d659955716833ce42f2bb060212", + "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", + "locus_13": "8f300259dcb46224bdc1fe5273107324", + "locus_14": "b9060019038526aa6fc38d2f7510edc6", + "locus_15": "bc98c2fe196a68a79036814396513a8d", + "locus_16": "16e55766c603fe33c9e75d8e81743ae2", + "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", + "locus_18": "b3021e979faa7600756c06dfadfcf14c", + "locus_19": "a012eee23637b48e39b00808a057e35d", + "locus_20": "4461918e985715e4a2b07494e1f91326" + } + }, + "seq_data": {} + } +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/conservative/G2/run.json b/tests/test_data/outputs/report/conservative/G2/run.json new file mode 100755 index 0000000..564bb8e --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G2/run.json @@ -0,0 +1,21 @@ +{ + "analysis_start_time": "19/06/2024 10:44:11", + "parameters": { + "command": "report", + "input": "test_dev/search/G2/seq_store.json", + "fasta": null, + "config": null, + "outdir": "test_dev/report/conservative/G2", + "name": "G2", + "mode": "conservative", + "prop": "locus_name", + "max_ambig": 0, + "max_stop": 0, + "match_ident": 100, + "match_cov": 100, + "translation_table": 11, + "force": true + }, + "result_file": "test_dev/report/conservative/G2/report.json", + "analysis_end_time": "19/06/2024 10:44:11" +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/conservative/G3/report.json b/tests/test_data/outputs/report/conservative/G3/report.json new file mode 100755 index 0000000..d5ebcc6 --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G3/report.json @@ -0,0 +1,49 @@ +{ + "db_info": { + "db_name": "Locidex Test Database", + "db_version": "1.0.0", + "db_date": "2024/06/06", + "db_author": "James Robertson", + "db_desc": "Using Salmonella GCA_000007545.2 as source data", + "db_num_seqs": 20, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" + }, + "parameters": { + "mode": "conservative", + "min_match_ident": 100, + "min_match_cov": 100, + "max_ambiguous": 0, + "max_internal_stops": 0 + }, + "data": { + "sample_name": "G3", + "profile": { + "G3": { + "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", + "locus_2": "8b70e777f6bbf2c91ff75947824b5976", + "locus_3": "670705cd2a59c4a23a897ac656a888fe", + "locus_4": "73790840c76943caac0ebb3b2b3f0b98", + "locus_5": "8cf4341689dd00f74adfcc43d1f4a35e", + "locus_6": "a11561f2804e2c32c78049f8b9aeb517", + "locus_7": "49d9878c9d3071aa1d2f26cb947b784c", + "locus_8": "7ebe74afecf146ec4db816c8deced64f", + "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", + "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", + "locus_11": "c4266f2f24fdd8e039113c6b0955af9f", + "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", + "locus_13": "8f300259dcb46224bdc1fe5273107324", + "locus_14": "2fa0b06ed72e36b4071cab9d0b4f87d0", + "locus_15": "bc98c2fe196a68a79036814396513a8d", + "locus_16": "a9b3cb97dac3cda6e932a49bf9a507bd", + "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", + "locus_18": "b3021e979faa7600756c06dfadfcf14c", + "locus_19": "de32372598811d63bcc1a0eaf6872644", + "locus_20": "4461918e985715e4a2b07494e1f91326" + } + }, + "seq_data": {} + } +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/conservative/G3/run.json b/tests/test_data/outputs/report/conservative/G3/run.json new file mode 100755 index 0000000..c9b3c08 --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G3/run.json @@ -0,0 +1,21 @@ +{ + "analysis_start_time": "19/06/2024 10:44:13", + "parameters": { + "command": "report", + "input": "test_dev/search/G3/seq_store.json", + "fasta": null, + "config": null, + "outdir": "test_dev/report/conservative/G3", + "name": "G3", + "mode": "conservative", + "prop": "locus_name", + "max_ambig": 0, + "max_stop": 0, + "match_ident": 100, + "match_cov": 100, + "translation_table": 11, + "force": true + }, + "result_file": "test_dev/report/conservative/G3/report.json", + "analysis_end_time": "19/06/2024 10:44:13" +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/conservative/G4/report.json b/tests/test_data/outputs/report/conservative/G4/report.json new file mode 100755 index 0000000..53f0b84 --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G4/report.json @@ -0,0 +1,49 @@ +{ + "db_info": { + "db_name": "Locidex Test Database", + "db_version": "1.0.0", + "db_date": "2024/06/06", + "db_author": "James Robertson", + "db_desc": "Using Salmonella GCA_000007545.2 as source data", + "db_num_seqs": 20, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" + }, + "parameters": { + "mode": "conservative", + "min_match_ident": 100, + "min_match_cov": 100, + "max_ambiguous": 0, + "max_internal_stops": 0 + }, + "data": { + "sample_name": "G4", + "profile": { + "G4": { + "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", + "locus_2": "e35184c8ff18e9116fc8faef20532f56", + "locus_3": "670705cd2a59c4a23a897ac656a888fe", + "locus_4": "ac1b21798c0f672ad26f5a91ea278590", + "locus_5": "d00defcca8588f21ce16fa1d0ac13389", + "locus_6": "a11561f2804e2c32c78049f8b9aeb517", + "locus_7": "dc94bf1ec4ff9bed2a1f460cbd958656", + "locus_8": "7ebe74afecf146ec4db816c8deced64f", + "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", + "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", + "locus_11": "5b128d659955716833ce42f2bb060212", + "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", + "locus_13": "8f300259dcb46224bdc1fe5273107324", + "locus_14": "b9060019038526aa6fc38d2f7510edc6", + "locus_15": "bc98c2fe196a68a79036814396513a8d", + "locus_16": "16e55766c603fe33c9e75d8e81743ae2", + "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", + "locus_18": "b3021e979faa7600756c06dfadfcf14c", + "locus_19": "a012eee23637b48e39b00808a057e35d", + "locus_20": "4461918e985715e4a2b07494e1f91326" + } + }, + "seq_data": {} + } +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/conservative/G4/run.json b/tests/test_data/outputs/report/conservative/G4/run.json new file mode 100755 index 0000000..de3d320 --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G4/run.json @@ -0,0 +1,21 @@ +{ + "analysis_start_time": "19/06/2024 10:44:15", + "parameters": { + "command": "report", + "input": "test_dev/search/G4/seq_store.json", + "fasta": null, + "config": null, + "outdir": "test_dev/report/conservative/G4", + "name": "G4", + "mode": "conservative", + "prop": "locus_name", + "max_ambig": 0, + "max_stop": 0, + "match_ident": 100, + "match_cov": 100, + "translation_table": 11, + "force": true + }, + "result_file": "test_dev/report/conservative/G4/report.json", + "analysis_end_time": "19/06/2024 10:44:15" +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/conservative/G5/report.json b/tests/test_data/outputs/report/conservative/G5/report.json new file mode 100755 index 0000000..763afa5 --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G5/report.json @@ -0,0 +1,49 @@ +{ + "db_info": { + "db_name": "Locidex Test Database", + "db_version": "1.0.0", + "db_date": "2024/06/06", + "db_author": "James Robertson", + "db_desc": "Using Salmonella GCA_000007545.2 as source data", + "db_num_seqs": 20, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" + }, + "parameters": { + "mode": "conservative", + "min_match_ident": 100, + "min_match_cov": 100, + "max_ambiguous": 0, + "max_internal_stops": 0 + }, + "data": { + "sample_name": "G5", + "profile": { + "G5": { + "locus_1": "e9e707ebc64e10a881f1323ebff85369", + "locus_2": "8b70e777f6bbf2c91ff75947824b5976", + "locus_3": "670705cd2a59c4a23a897ac656a888fe", + "locus_4": "73790840c76943caac0ebb3b2b3f0b98", + "locus_5": "8cf4341689dd00f74adfcc43d1f4a35e", + "locus_6": "a11561f2804e2c32c78049f8b9aeb517", + "locus_7": "49d9878c9d3071aa1d2f26cb947b784c", + "locus_8": "7ebe74afecf146ec4db816c8deced64f", + "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", + "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", + "locus_11": "c4266f2f24fdd8e039113c6b0955af9f", + "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", + "locus_13": "8f300259dcb46224bdc1fe5273107324", + "locus_14": "2fa0b06ed72e36b4071cab9d0b4f87d0", + "locus_15": "bc98c2fe196a68a79036814396513a8d", + "locus_16": "a9b3cb97dac3cda6e932a49bf9a507bd", + "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", + "locus_18": "b3021e979faa7600756c06dfadfcf14c", + "locus_19": "de32372598811d63bcc1a0eaf6872644", + "locus_20": "4461918e985715e4a2b07494e1f91326" + } + }, + "seq_data": {} + } +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/conservative/G5/run.json b/tests/test_data/outputs/report/conservative/G5/run.json new file mode 100755 index 0000000..692163e --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G5/run.json @@ -0,0 +1,21 @@ +{ + "analysis_start_time": "19/06/2024 10:44:17", + "parameters": { + "command": "report", + "input": "test_dev/search/G5/seq_store.json", + "fasta": null, + "config": null, + "outdir": "test_dev/report/conservative/G5", + "name": "G5", + "mode": "conservative", + "prop": "locus_name", + "max_ambig": 0, + "max_stop": 0, + "match_ident": 100, + "match_cov": 100, + "translation_table": 11, + "force": true + }, + "result_file": "test_dev/report/conservative/G5/report.json", + "analysis_end_time": "19/06/2024 10:44:17" +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/conservative/G6/report.json b/tests/test_data/outputs/report/conservative/G6/report.json new file mode 100755 index 0000000..685aac3 --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G6/report.json @@ -0,0 +1,49 @@ +{ + "db_info": { + "db_name": "Locidex Test Database", + "db_version": "1.0.0", + "db_date": "2024/06/06", + "db_author": "James Robertson", + "db_desc": "Using Salmonella GCA_000007545.2 as source data", + "db_num_seqs": 20, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" + }, + "parameters": { + "mode": "conservative", + "min_match_ident": 100, + "min_match_cov": 100, + "max_ambiguous": 0, + "max_internal_stops": 0 + }, + "data": { + "sample_name": "G6", + "profile": { + "G6": { + "locus_1": "a47cc24760462371e919143c5cc81376", + "locus_2": "e35184c8ff18e9116fc8faef20532f56", + "locus_3": "670705cd2a59c4a23a897ac656a888fe", + "locus_4": "ac1b21798c0f672ad26f5a91ea278590", + "locus_5": "d00defcca8588f21ce16fa1d0ac13389", + "locus_6": "a11561f2804e2c32c78049f8b9aeb517", + "locus_7": "dc94bf1ec4ff9bed2a1f460cbd958656", + "locus_8": "7ebe74afecf146ec4db816c8deced64f", + "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", + "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", + "locus_11": "5b128d659955716833ce42f2bb060212", + "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", + "locus_13": "8f300259dcb46224bdc1fe5273107324", + "locus_14": "b9060019038526aa6fc38d2f7510edc6", + "locus_15": "bc98c2fe196a68a79036814396513a8d", + "locus_16": "16e55766c603fe33c9e75d8e81743ae2", + "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", + "locus_18": "b3021e979faa7600756c06dfadfcf14c", + "locus_19": "a012eee23637b48e39b00808a057e35d", + "locus_20": "4461918e985715e4a2b07494e1f91326" + } + }, + "seq_data": {} + } +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/conservative/G6/run.json b/tests/test_data/outputs/report/conservative/G6/run.json new file mode 100755 index 0000000..fc4870a --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G6/run.json @@ -0,0 +1,21 @@ +{ + "analysis_start_time": "19/06/2024 10:44:19", + "parameters": { + "command": "report", + "input": "test_dev/search/G6/seq_store.json", + "fasta": null, + "config": null, + "outdir": "test_dev/report/conservative/G6", + "name": "G6", + "mode": "conservative", + "prop": "locus_name", + "max_ambig": 0, + "max_stop": 0, + "match_ident": 100, + "match_cov": 100, + "translation_table": 11, + "force": true + }, + "result_file": "test_dev/report/conservative/G6/report.json", + "analysis_end_time": "19/06/2024 10:44:19" +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/conservative/G7/report.json b/tests/test_data/outputs/report/conservative/G7/report.json new file mode 100755 index 0000000..f4f93b2 --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G7/report.json @@ -0,0 +1,49 @@ +{ + "db_info": { + "db_name": "Locidex Test Database", + "db_version": "1.0.0", + "db_date": "2024/06/06", + "db_author": "James Robertson", + "db_desc": "Using Salmonella GCA_000007545.2 as source data", + "db_num_seqs": 20, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" + }, + "parameters": { + "mode": "conservative", + "min_match_ident": 100, + "min_match_cov": 100, + "max_ambiguous": 0, + "max_internal_stops": 0 + }, + "data": { + "sample_name": "G7", + "profile": { + "G7": { + "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", + "locus_2": "8b70e777f6bbf2c91ff75947824b5976", + "locus_3": "670705cd2a59c4a23a897ac656a888fe", + "locus_4": "73790840c76943caac0ebb3b2b3f0b98", + "locus_5": "8cf4341689dd00f74adfcc43d1f4a35e", + "locus_6": "a11561f2804e2c32c78049f8b9aeb517", + "locus_7": "49d9878c9d3071aa1d2f26cb947b784c", + "locus_8": "7ebe74afecf146ec4db816c8deced64f", + "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", + "locus_10": "796419469778f7ec3851c813f59cfff7", + "locus_11": "c4266f2f24fdd8e039113c6b0955af9f", + "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", + "locus_13": "8f300259dcb46224bdc1fe5273107324", + "locus_14": "2fa0b06ed72e36b4071cab9d0b4f87d0", + "locus_15": "bc98c2fe196a68a79036814396513a8d", + "locus_16": "a9b3cb97dac3cda6e932a49bf9a507bd", + "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", + "locus_18": "b3021e979faa7600756c06dfadfcf14c", + "locus_19": "de32372598811d63bcc1a0eaf6872644", + "locus_20": "4461918e985715e4a2b07494e1f91326" + } + }, + "seq_data": {} + } +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/conservative/G7/run.json b/tests/test_data/outputs/report/conservative/G7/run.json new file mode 100755 index 0000000..8ce2c49 --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G7/run.json @@ -0,0 +1,21 @@ +{ + "analysis_start_time": "19/06/2024 10:44:21", + "parameters": { + "command": "report", + "input": "test_dev/search/G7/seq_store.json", + "fasta": null, + "config": null, + "outdir": "test_dev/report/conservative/G7", + "name": "G7", + "mode": "conservative", + "prop": "locus_name", + "max_ambig": 0, + "max_stop": 0, + "match_ident": 100, + "match_cov": 100, + "translation_table": 11, + "force": true + }, + "result_file": "test_dev/report/conservative/G7/report.json", + "analysis_end_time": "19/06/2024 10:44:21" +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/conservative/G8/report.json b/tests/test_data/outputs/report/conservative/G8/report.json new file mode 100755 index 0000000..3034d22 --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G8/report.json @@ -0,0 +1,49 @@ +{ + "db_info": { + "db_name": "Locidex Test Database", + "db_version": "1.0.0", + "db_date": "2024/06/06", + "db_author": "James Robertson", + "db_desc": "Using Salmonella GCA_000007545.2 as source data", + "db_num_seqs": 20, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" + }, + "parameters": { + "mode": "conservative", + "min_match_ident": 100, + "min_match_cov": 100, + "max_ambiguous": 0, + "max_internal_stops": 0 + }, + "data": { + "sample_name": "G8", + "profile": { + "G8": { + "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", + "locus_2": "e35184c8ff18e9116fc8faef20532f56", + "locus_3": "670705cd2a59c4a23a897ac656a888fe", + "locus_4": "ac1b21798c0f672ad26f5a91ea278590", + "locus_5": "d00defcca8588f21ce16fa1d0ac13389", + "locus_6": "a11561f2804e2c32c78049f8b9aeb517", + "locus_7": "dc94bf1ec4ff9bed2a1f460cbd958656", + "locus_8": "7ebe74afecf146ec4db816c8deced64f", + "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", + "locus_10": "fd6284b58a891cf02058906c9ee37a00", + "locus_11": "5b128d659955716833ce42f2bb060212", + "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", + "locus_13": "8f300259dcb46224bdc1fe5273107324", + "locus_14": "b9060019038526aa6fc38d2f7510edc6", + "locus_15": "bc98c2fe196a68a79036814396513a8d", + "locus_16": "16e55766c603fe33c9e75d8e81743ae2", + "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", + "locus_18": "b3021e979faa7600756c06dfadfcf14c", + "locus_19": "a012eee23637b48e39b00808a057e35d", + "locus_20": "4461918e985715e4a2b07494e1f91326" + } + }, + "seq_data": {} + } +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/conservative/G8/run.json b/tests/test_data/outputs/report/conservative/G8/run.json new file mode 100755 index 0000000..fcab982 --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G8/run.json @@ -0,0 +1,21 @@ +{ + "analysis_start_time": "19/06/2024 10:44:23", + "parameters": { + "command": "report", + "input": "test_dev/search/G8/seq_store.json", + "fasta": null, + "config": null, + "outdir": "test_dev/report/conservative/G8", + "name": "G8", + "mode": "conservative", + "prop": "locus_name", + "max_ambig": 0, + "max_stop": 0, + "match_ident": 100, + "match_cov": 100, + "translation_table": 11, + "force": true + }, + "result_file": "test_dev/report/conservative/G8/report.json", + "analysis_end_time": "19/06/2024 10:44:23" +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/conservative/G9/report.json b/tests/test_data/outputs/report/conservative/G9/report.json new file mode 100755 index 0000000..6e4728c --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G9/report.json @@ -0,0 +1,49 @@ +{ + "db_info": { + "db_name": "Locidex Test Database", + "db_version": "1.0.0", + "db_date": "2024/06/06", + "db_author": "James Robertson", + "db_desc": "Using Salmonella GCA_000007545.2 as source data", + "db_num_seqs": 20, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" + }, + "parameters": { + "mode": "conservative", + "min_match_ident": 100, + "min_match_cov": 100, + "max_ambiguous": 0, + "max_internal_stops": 0 + }, + "data": { + "sample_name": "G9", + "profile": { + "G9": { + "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", + "locus_2": "8b70e777f6bbf2c91ff75947824b5976", + "locus_3": "670705cd2a59c4a23a897ac656a888fe", + "locus_4": "73790840c76943caac0ebb3b2b3f0b98", + "locus_5": "8cf4341689dd00f74adfcc43d1f4a35e", + "locus_6": "a11561f2804e2c32c78049f8b9aeb517", + "locus_7": "49d9878c9d3071aa1d2f26cb947b784c", + "locus_8": "7ebe74afecf146ec4db816c8deced64f", + "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", + "locus_10": "-", + "locus_11": "c4266f2f24fdd8e039113c6b0955af9f", + "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", + "locus_13": "8f300259dcb46224bdc1fe5273107324", + "locus_14": "2fa0b06ed72e36b4071cab9d0b4f87d0", + "locus_15": "bc98c2fe196a68a79036814396513a8d", + "locus_16": "a9b3cb97dac3cda6e932a49bf9a507bd", + "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", + "locus_18": "b3021e979faa7600756c06dfadfcf14c", + "locus_19": "de32372598811d63bcc1a0eaf6872644", + "locus_20": "4461918e985715e4a2b07494e1f91326" + } + }, + "seq_data": {} + } +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/conservative/G9/run.json b/tests/test_data/outputs/report/conservative/G9/run.json new file mode 100755 index 0000000..06a275b --- /dev/null +++ b/tests/test_data/outputs/report/conservative/G9/run.json @@ -0,0 +1,21 @@ +{ + "analysis_start_time": "19/06/2024 10:44:25", + "parameters": { + "command": "report", + "input": "test_dev/search/G9/seq_store.json", + "fasta": null, + "config": null, + "outdir": "test_dev/report/conservative/G9", + "name": "G9", + "mode": "conservative", + "prop": "locus_name", + "max_ambig": 0, + "max_stop": 0, + "match_ident": 100, + "match_cov": 100, + "translation_table": 11, + "force": true + }, + "result_file": "test_dev/report/conservative/G9/report.json", + "analysis_end_time": "19/06/2024 10:44:25" +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/normal/G1/report.json b/tests/test_data/outputs/report/normal/G1/report.json new file mode 100755 index 0000000..302aa1e --- /dev/null +++ b/tests/test_data/outputs/report/normal/G1/report.json @@ -0,0 +1,49 @@ +{ + "db_info": { + "db_name": "Locidex Test Database", + "db_version": "1.0.0", + "db_date": "2024/06/06", + "db_author": "James Robertson", + "db_desc": "Using Salmonella GCA_000007545.2 as source data", + "db_num_seqs": 20, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" + }, + "parameters": { + "mode": "normal", + "min_match_ident": 100, + "min_match_cov": 100, + "max_ambiguous": 0, + "max_internal_stops": 0 + }, + "data": { + "sample_name": "G1", + "profile": { + "G1": { + "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", + "locus_2": "8b70e777f6bbf2c91ff75947824b5976", + "locus_3": "670705cd2a59c4a23a897ac656a888fe", + "locus_4": "73790840c76943caac0ebb3b2b3f0b98", + "locus_5": "8cf4341689dd00f74adfcc43d1f4a35e", + "locus_6": "a11561f2804e2c32c78049f8b9aeb517", + "locus_7": "49d9878c9d3071aa1d2f26cb947b784c", + "locus_8": "7ebe74afecf146ec4db816c8deced64f", + "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", + "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", + "locus_11": "c4266f2f24fdd8e039113c6b0955af9f", + "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", + "locus_13": "8f300259dcb46224bdc1fe5273107324", + "locus_14": "2fa0b06ed72e36b4071cab9d0b4f87d0", + "locus_15": "bc98c2fe196a68a79036814396513a8d", + "locus_16": "a9b3cb97dac3cda6e932a49bf9a507bd", + "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", + "locus_18": "b3021e979faa7600756c06dfadfcf14c", + "locus_19": "de32372598811d63bcc1a0eaf6872644", + "locus_20": "4461918e985715e4a2b07494e1f91326" + } + }, + "seq_data": {} + } +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/normal/G1/run.json b/tests/test_data/outputs/report/normal/G1/run.json new file mode 100755 index 0000000..8ab37f9 --- /dev/null +++ b/tests/test_data/outputs/report/normal/G1/run.json @@ -0,0 +1,21 @@ +{ + "analysis_start_time": "19/06/2024 10:44:36", + "parameters": { + "command": "report", + "input": "test_dev/search/G1/seq_store.json", + "fasta": null, + "config": null, + "outdir": "test_dev/report/normal/G1", + "name": "G1", + "mode": "normal", + "prop": "locus_name", + "max_ambig": 0, + "max_stop": 0, + "match_ident": 100, + "match_cov": 100, + "translation_table": 11, + "force": true + }, + "result_file": "test_dev/report/normal/G1/report.json", + "analysis_end_time": "19/06/2024 10:44:36" +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/normal/G10/report.json b/tests/test_data/outputs/report/normal/G10/report.json new file mode 100755 index 0000000..8151bd5 --- /dev/null +++ b/tests/test_data/outputs/report/normal/G10/report.json @@ -0,0 +1,49 @@ +{ + "db_info": { + "db_name": "Locidex Test Database", + "db_version": "1.0.0", + "db_date": "2024/06/06", + "db_author": "James Robertson", + "db_desc": "Using Salmonella GCA_000007545.2 as source data", + "db_num_seqs": 20, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" + }, + "parameters": { + "mode": "normal", + "min_match_ident": 100, + "min_match_cov": 100, + "max_ambiguous": 0, + "max_internal_stops": 0 + }, + "data": { + "sample_name": "G10", + "profile": { + "G10": { + "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", + "locus_2": "e35184c8ff18e9116fc8faef20532f56", + "locus_3": "670705cd2a59c4a23a897ac656a888fe", + "locus_4": "ac1b21798c0f672ad26f5a91ea278590", + "locus_5": "d00defcca8588f21ce16fa1d0ac13389", + "locus_6": "a11561f2804e2c32c78049f8b9aeb517", + "locus_7": "dc94bf1ec4ff9bed2a1f460cbd958656", + "locus_8": "7ebe74afecf146ec4db816c8deced64f", + "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", + "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", + "locus_11": "-", + "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", + "locus_13": "8f300259dcb46224bdc1fe5273107324", + "locus_14": "b9060019038526aa6fc38d2f7510edc6", + "locus_15": "bc98c2fe196a68a79036814396513a8d", + "locus_16": "16e55766c603fe33c9e75d8e81743ae2", + "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", + "locus_18": "b3021e979faa7600756c06dfadfcf14c", + "locus_19": "a012eee23637b48e39b00808a057e35d", + "locus_20": "4461918e985715e4a2b07494e1f91326" + } + }, + "seq_data": {} + } +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/normal/G10/run.json b/tests/test_data/outputs/report/normal/G10/run.json new file mode 100755 index 0000000..8ad524d --- /dev/null +++ b/tests/test_data/outputs/report/normal/G10/run.json @@ -0,0 +1,21 @@ +{ + "analysis_start_time": "19/06/2024 10:44:54", + "parameters": { + "command": "report", + "input": "test_dev/search/G10/seq_store.json", + "fasta": null, + "config": null, + "outdir": "test_dev/report/normal/G10", + "name": "G10", + "mode": "normal", + "prop": "locus_name", + "max_ambig": 0, + "max_stop": 0, + "match_ident": 100, + "match_cov": 100, + "translation_table": 11, + "force": true + }, + "result_file": "test_dev/report/normal/G10/report.json", + "analysis_end_time": "19/06/2024 10:44:54" +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/normal/G11/report.json b/tests/test_data/outputs/report/normal/G11/report.json new file mode 100755 index 0000000..d4b46e3 --- /dev/null +++ b/tests/test_data/outputs/report/normal/G11/report.json @@ -0,0 +1,49 @@ +{ + "db_info": { + "db_name": "Locidex Test Database", + "db_version": "1.0.0", + "db_date": "2024/06/06", + "db_author": "James Robertson", + "db_desc": "Using Salmonella GCA_000007545.2 as source data", + "db_num_seqs": 20, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" + }, + "parameters": { + "mode": "normal", + "min_match_ident": 100, + "min_match_cov": 100, + "max_ambiguous": 0, + "max_internal_stops": 0 + }, + "data": { + "sample_name": "G11", + "profile": { + "G11": { + "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", + "locus_2": "8b70e777f6bbf2c91ff75947824b5976", + "locus_3": "670705cd2a59c4a23a897ac656a888fe", + "locus_4": "73790840c76943caac0ebb3b2b3f0b98", + "locus_5": "8cf4341689dd00f74adfcc43d1f4a35e", + "locus_6": "a11561f2804e2c32c78049f8b9aeb517", + "locus_7": "49d9878c9d3071aa1d2f26cb947b784c", + "locus_8": "7ebe74afecf146ec4db816c8deced64f", + "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", + "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", + "locus_11": "c4266f2f24fdd8e039113c6b0955af9f", + "locus_12": "60934464690fea26102d1c8c9acb755d", + "locus_13": "8f300259dcb46224bdc1fe5273107324", + "locus_14": "2fa0b06ed72e36b4071cab9d0b4f87d0", + "locus_15": "bc98c2fe196a68a79036814396513a8d", + "locus_16": "a9b3cb97dac3cda6e932a49bf9a507bd", + "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", + "locus_18": "b3021e979faa7600756c06dfadfcf14c", + "locus_19": "de32372598811d63bcc1a0eaf6872644", + "locus_20": "4461918e985715e4a2b07494e1f91326" + } + }, + "seq_data": {} + } +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/normal/G11/run.json b/tests/test_data/outputs/report/normal/G11/run.json new file mode 100755 index 0000000..739ec52 --- /dev/null +++ b/tests/test_data/outputs/report/normal/G11/run.json @@ -0,0 +1,21 @@ +{ + "analysis_start_time": "19/06/2024 10:44:56", + "parameters": { + "command": "report", + "input": "test_dev/search/G11/seq_store.json", + "fasta": null, + "config": null, + "outdir": "test_dev/report/normal/G11", + "name": "G11", + "mode": "normal", + "prop": "locus_name", + "max_ambig": 0, + "max_stop": 0, + "match_ident": 100, + "match_cov": 100, + "translation_table": 11, + "force": true + }, + "result_file": "test_dev/report/normal/G11/report.json", + "analysis_end_time": "19/06/2024 10:44:56" +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/normal/G12/report.json b/tests/test_data/outputs/report/normal/G12/report.json new file mode 100755 index 0000000..0944398 --- /dev/null +++ b/tests/test_data/outputs/report/normal/G12/report.json @@ -0,0 +1,49 @@ +{ + "db_info": { + "db_name": "Locidex Test Database", + "db_version": "1.0.0", + "db_date": "2024/06/06", + "db_author": "James Robertson", + "db_desc": "Using Salmonella GCA_000007545.2 as source data", + "db_num_seqs": 20, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" + }, + "parameters": { + "mode": "normal", + "min_match_ident": 100, + "min_match_cov": 100, + "max_ambiguous": 0, + "max_internal_stops": 0 + }, + "data": { + "sample_name": "G12", + "profile": { + "G12": { + "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", + "locus_2": "e35184c8ff18e9116fc8faef20532f56", + "locus_3": "670705cd2a59c4a23a897ac656a888fe", + "locus_4": "ac1b21798c0f672ad26f5a91ea278590", + "locus_5": "d00defcca8588f21ce16fa1d0ac13389", + "locus_6": "a11561f2804e2c32c78049f8b9aeb517", + "locus_7": "dc94bf1ec4ff9bed2a1f460cbd958656", + "locus_8": "7ebe74afecf146ec4db816c8deced64f", + "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", + "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", + "locus_11": "5b128d659955716833ce42f2bb060212", + "locus_12": "60934464690fea26102d1c8c9acb755d", + "locus_13": "8f300259dcb46224bdc1fe5273107324", + "locus_14": "b9060019038526aa6fc38d2f7510edc6", + "locus_15": "bc98c2fe196a68a79036814396513a8d", + "locus_16": "16e55766c603fe33c9e75d8e81743ae2", + "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", + "locus_18": "b3021e979faa7600756c06dfadfcf14c", + "locus_19": "a012eee23637b48e39b00808a057e35d", + "locus_20": "4461918e985715e4a2b07494e1f91326" + } + }, + "seq_data": {} + } +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/normal/G12/run.json b/tests/test_data/outputs/report/normal/G12/run.json new file mode 100755 index 0000000..6fd114d --- /dev/null +++ b/tests/test_data/outputs/report/normal/G12/run.json @@ -0,0 +1,21 @@ +{ + "analysis_start_time": "19/06/2024 10:44:57", + "parameters": { + "command": "report", + "input": "test_dev/search/G12/seq_store.json", + "fasta": null, + "config": null, + "outdir": "test_dev/report/normal/G12", + "name": "G12", + "mode": "normal", + "prop": "locus_name", + "max_ambig": 0, + "max_stop": 0, + "match_ident": 100, + "match_cov": 100, + "translation_table": 11, + "force": true + }, + "result_file": "test_dev/report/normal/G12/report.json", + "analysis_end_time": "19/06/2024 10:44:57" +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/normal/G13/report.json b/tests/test_data/outputs/report/normal/G13/report.json new file mode 100755 index 0000000..d6418d2 --- /dev/null +++ b/tests/test_data/outputs/report/normal/G13/report.json @@ -0,0 +1,49 @@ +{ + "db_info": { + "db_name": "Locidex Test Database", + "db_version": "1.0.0", + "db_date": "2024/06/06", + "db_author": "James Robertson", + "db_desc": "Using Salmonella GCA_000007545.2 as source data", + "db_num_seqs": 20, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" + }, + "parameters": { + "mode": "normal", + "min_match_ident": 100, + "min_match_cov": 100, + "max_ambiguous": 0, + "max_internal_stops": 0 + }, + "data": { + "sample_name": "G13", + "profile": { + "G13": { + "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", + "locus_2": "8b70e777f6bbf2c91ff75947824b5976", + "locus_3": "670705cd2a59c4a23a897ac656a888fe", + "locus_4": "73790840c76943caac0ebb3b2b3f0b98", + "locus_5": "8cf4341689dd00f74adfcc43d1f4a35e", + "locus_6": "a11561f2804e2c32c78049f8b9aeb517", + "locus_7": "49d9878c9d3071aa1d2f26cb947b784c", + "locus_8": "7ebe74afecf146ec4db816c8deced64f", + "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", + "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", + "locus_11": "c4266f2f24fdd8e039113c6b0955af9f", + "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", + "locus_13": "70e88b95c11c37150f37312882af5771", + "locus_14": "2fa0b06ed72e36b4071cab9d0b4f87d0", + "locus_15": "bc98c2fe196a68a79036814396513a8d", + "locus_16": "a9b3cb97dac3cda6e932a49bf9a507bd", + "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", + "locus_18": "b3021e979faa7600756c06dfadfcf14c", + "locus_19": "de32372598811d63bcc1a0eaf6872644", + "locus_20": "4461918e985715e4a2b07494e1f91326" + } + }, + "seq_data": {} + } +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/normal/G13/run.json b/tests/test_data/outputs/report/normal/G13/run.json new file mode 100755 index 0000000..aab951f --- /dev/null +++ b/tests/test_data/outputs/report/normal/G13/run.json @@ -0,0 +1,21 @@ +{ + "analysis_start_time": "19/06/2024 10:44:59", + "parameters": { + "command": "report", + "input": "test_dev/search/G13/seq_store.json", + "fasta": null, + "config": null, + "outdir": "test_dev/report/normal/G13", + "name": "G13", + "mode": "normal", + "prop": "locus_name", + "max_ambig": 0, + "max_stop": 0, + "match_ident": 100, + "match_cov": 100, + "translation_table": 11, + "force": true + }, + "result_file": "test_dev/report/normal/G13/report.json", + "analysis_end_time": "19/06/2024 10:44:59" +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/normal/G14/report.json b/tests/test_data/outputs/report/normal/G14/report.json new file mode 100755 index 0000000..4931ce2 --- /dev/null +++ b/tests/test_data/outputs/report/normal/G14/report.json @@ -0,0 +1,49 @@ +{ + "db_info": { + "db_name": "Locidex Test Database", + "db_version": "1.0.0", + "db_date": "2024/06/06", + "db_author": "James Robertson", + "db_desc": "Using Salmonella GCA_000007545.2 as source data", + "db_num_seqs": 20, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" + }, + "parameters": { + "mode": "normal", + "min_match_ident": 100, + "min_match_cov": 100, + "max_ambiguous": 0, + "max_internal_stops": 0 + }, + "data": { + "sample_name": "G14", + "profile": { + "G14": { + "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", + "locus_2": "e35184c8ff18e9116fc8faef20532f56", + "locus_3": "670705cd2a59c4a23a897ac656a888fe", + "locus_4": "ac1b21798c0f672ad26f5a91ea278590", + "locus_5": "d00defcca8588f21ce16fa1d0ac13389", + "locus_6": "a11561f2804e2c32c78049f8b9aeb517", + "locus_7": "dc94bf1ec4ff9bed2a1f460cbd958656", + "locus_8": "7ebe74afecf146ec4db816c8deced64f", + "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", + "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", + "locus_11": "5b128d659955716833ce42f2bb060212", + "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", + "locus_13": "70e88b95c11c37150f37312882af5771", + "locus_14": "b9060019038526aa6fc38d2f7510edc6", + "locus_15": "bc98c2fe196a68a79036814396513a8d", + "locus_16": "16e55766c603fe33c9e75d8e81743ae2", + "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", + "locus_18": "b3021e979faa7600756c06dfadfcf14c", + "locus_19": "a012eee23637b48e39b00808a057e35d", + "locus_20": "4461918e985715e4a2b07494e1f91326" + } + }, + "seq_data": {} + } +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/normal/G14/run.json b/tests/test_data/outputs/report/normal/G14/run.json new file mode 100755 index 0000000..9d19db2 --- /dev/null +++ b/tests/test_data/outputs/report/normal/G14/run.json @@ -0,0 +1,21 @@ +{ + "analysis_start_time": "19/06/2024 10:45:01", + "parameters": { + "command": "report", + "input": "test_dev/search/G14/seq_store.json", + "fasta": null, + "config": null, + "outdir": "test_dev/report/normal/G14", + "name": "G14", + "mode": "normal", + "prop": "locus_name", + "max_ambig": 0, + "max_stop": 0, + "match_ident": 100, + "match_cov": 100, + "translation_table": 11, + "force": true + }, + "result_file": "test_dev/report/normal/G14/report.json", + "analysis_end_time": "19/06/2024 10:45:01" +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/normal/G2/report.json b/tests/test_data/outputs/report/normal/G2/report.json new file mode 100755 index 0000000..ff8162e --- /dev/null +++ b/tests/test_data/outputs/report/normal/G2/report.json @@ -0,0 +1,49 @@ +{ + "db_info": { + "db_name": "Locidex Test Database", + "db_version": "1.0.0", + "db_date": "2024/06/06", + "db_author": "James Robertson", + "db_desc": "Using Salmonella GCA_000007545.2 as source data", + "db_num_seqs": 20, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" + }, + "parameters": { + "mode": "normal", + "min_match_ident": 100, + "min_match_cov": 100, + "max_ambiguous": 0, + "max_internal_stops": 0 + }, + "data": { + "sample_name": "G2", + "profile": { + "G2": { + "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", + "locus_2": "e35184c8ff18e9116fc8faef20532f56", + "locus_3": "670705cd2a59c4a23a897ac656a888fe", + "locus_4": "ac1b21798c0f672ad26f5a91ea278590", + "locus_5": "d00defcca8588f21ce16fa1d0ac13389", + "locus_6": "a11561f2804e2c32c78049f8b9aeb517", + "locus_7": "dc94bf1ec4ff9bed2a1f460cbd958656", + "locus_8": "7ebe74afecf146ec4db816c8deced64f", + "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", + "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", + "locus_11": "5b128d659955716833ce42f2bb060212", + "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", + "locus_13": "8f300259dcb46224bdc1fe5273107324", + "locus_14": "b9060019038526aa6fc38d2f7510edc6", + "locus_15": "bc98c2fe196a68a79036814396513a8d", + "locus_16": "16e55766c603fe33c9e75d8e81743ae2", + "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", + "locus_18": "b3021e979faa7600756c06dfadfcf14c", + "locus_19": "a012eee23637b48e39b00808a057e35d", + "locus_20": "4461918e985715e4a2b07494e1f91326" + } + }, + "seq_data": {} + } +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/normal/G2/run.json b/tests/test_data/outputs/report/normal/G2/run.json new file mode 100755 index 0000000..0ed17d4 --- /dev/null +++ b/tests/test_data/outputs/report/normal/G2/run.json @@ -0,0 +1,21 @@ +{ + "analysis_start_time": "19/06/2024 10:44:38", + "parameters": { + "command": "report", + "input": "test_dev/search/G2/seq_store.json", + "fasta": null, + "config": null, + "outdir": "test_dev/report/normal/G2", + "name": "G2", + "mode": "normal", + "prop": "locus_name", + "max_ambig": 0, + "max_stop": 0, + "match_ident": 100, + "match_cov": 100, + "translation_table": 11, + "force": true + }, + "result_file": "test_dev/report/normal/G2/report.json", + "analysis_end_time": "19/06/2024 10:44:38" +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/normal/G3/report.json b/tests/test_data/outputs/report/normal/G3/report.json new file mode 100755 index 0000000..cab7987 --- /dev/null +++ b/tests/test_data/outputs/report/normal/G3/report.json @@ -0,0 +1,49 @@ +{ + "db_info": { + "db_name": "Locidex Test Database", + "db_version": "1.0.0", + "db_date": "2024/06/06", + "db_author": "James Robertson", + "db_desc": "Using Salmonella GCA_000007545.2 as source data", + "db_num_seqs": 20, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" + }, + "parameters": { + "mode": "normal", + "min_match_ident": 100, + "min_match_cov": 100, + "max_ambiguous": 0, + "max_internal_stops": 0 + }, + "data": { + "sample_name": "G3", + "profile": { + "G3": { + "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", + "locus_2": "8b70e777f6bbf2c91ff75947824b5976", + "locus_3": "670705cd2a59c4a23a897ac656a888fe", + "locus_4": "73790840c76943caac0ebb3b2b3f0b98", + "locus_5": "8cf4341689dd00f74adfcc43d1f4a35e", + "locus_6": "a11561f2804e2c32c78049f8b9aeb517", + "locus_7": "49d9878c9d3071aa1d2f26cb947b784c", + "locus_8": "7ebe74afecf146ec4db816c8deced64f", + "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", + "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", + "locus_11": "c4266f2f24fdd8e039113c6b0955af9f", + "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", + "locus_13": "8f300259dcb46224bdc1fe5273107324", + "locus_14": "2fa0b06ed72e36b4071cab9d0b4f87d0", + "locus_15": "bc98c2fe196a68a79036814396513a8d", + "locus_16": "a9b3cb97dac3cda6e932a49bf9a507bd", + "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", + "locus_18": "b3021e979faa7600756c06dfadfcf14c", + "locus_19": "de32372598811d63bcc1a0eaf6872644", + "locus_20": "4461918e985715e4a2b07494e1f91326" + } + }, + "seq_data": {} + } +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/normal/G3/run.json b/tests/test_data/outputs/report/normal/G3/run.json new file mode 100755 index 0000000..9f47ca0 --- /dev/null +++ b/tests/test_data/outputs/report/normal/G3/run.json @@ -0,0 +1,21 @@ +{ + "analysis_start_time": "19/06/2024 10:44:40", + "parameters": { + "command": "report", + "input": "test_dev/search/G3/seq_store.json", + "fasta": null, + "config": null, + "outdir": "test_dev/report/normal/G3", + "name": "G3", + "mode": "normal", + "prop": "locus_name", + "max_ambig": 0, + "max_stop": 0, + "match_ident": 100, + "match_cov": 100, + "translation_table": 11, + "force": true + }, + "result_file": "test_dev/report/normal/G3/report.json", + "analysis_end_time": "19/06/2024 10:44:40" +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/normal/G4/report.json b/tests/test_data/outputs/report/normal/G4/report.json new file mode 100755 index 0000000..a08424a --- /dev/null +++ b/tests/test_data/outputs/report/normal/G4/report.json @@ -0,0 +1,49 @@ +{ + "db_info": { + "db_name": "Locidex Test Database", + "db_version": "1.0.0", + "db_date": "2024/06/06", + "db_author": "James Robertson", + "db_desc": "Using Salmonella GCA_000007545.2 as source data", + "db_num_seqs": 20, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" + }, + "parameters": { + "mode": "normal", + "min_match_ident": 100, + "min_match_cov": 100, + "max_ambiguous": 0, + "max_internal_stops": 0 + }, + "data": { + "sample_name": "G4", + "profile": { + "G4": { + "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", + "locus_2": "e35184c8ff18e9116fc8faef20532f56", + "locus_3": "670705cd2a59c4a23a897ac656a888fe", + "locus_4": "ac1b21798c0f672ad26f5a91ea278590", + "locus_5": "d00defcca8588f21ce16fa1d0ac13389", + "locus_6": "a11561f2804e2c32c78049f8b9aeb517", + "locus_7": "dc94bf1ec4ff9bed2a1f460cbd958656", + "locus_8": "7ebe74afecf146ec4db816c8deced64f", + "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", + "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", + "locus_11": "5b128d659955716833ce42f2bb060212", + "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", + "locus_13": "8f300259dcb46224bdc1fe5273107324", + "locus_14": "b9060019038526aa6fc38d2f7510edc6", + "locus_15": "bc98c2fe196a68a79036814396513a8d", + "locus_16": "16e55766c603fe33c9e75d8e81743ae2", + "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", + "locus_18": "b3021e979faa7600756c06dfadfcf14c", + "locus_19": "a012eee23637b48e39b00808a057e35d", + "locus_20": "4461918e985715e4a2b07494e1f91326" + } + }, + "seq_data": {} + } +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/normal/G4/run.json b/tests/test_data/outputs/report/normal/G4/run.json new file mode 100755 index 0000000..a2d8f5a --- /dev/null +++ b/tests/test_data/outputs/report/normal/G4/run.json @@ -0,0 +1,21 @@ +{ + "analysis_start_time": "19/06/2024 10:44:42", + "parameters": { + "command": "report", + "input": "test_dev/search/G4/seq_store.json", + "fasta": null, + "config": null, + "outdir": "test_dev/report/normal/G4", + "name": "G4", + "mode": "normal", + "prop": "locus_name", + "max_ambig": 0, + "max_stop": 0, + "match_ident": 100, + "match_cov": 100, + "translation_table": 11, + "force": true + }, + "result_file": "test_dev/report/normal/G4/report.json", + "analysis_end_time": "19/06/2024 10:44:42" +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/normal/G5/report.json b/tests/test_data/outputs/report/normal/G5/report.json new file mode 100755 index 0000000..43ebfe1 --- /dev/null +++ b/tests/test_data/outputs/report/normal/G5/report.json @@ -0,0 +1,49 @@ +{ + "db_info": { + "db_name": "Locidex Test Database", + "db_version": "1.0.0", + "db_date": "2024/06/06", + "db_author": "James Robertson", + "db_desc": "Using Salmonella GCA_000007545.2 as source data", + "db_num_seqs": 20, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" + }, + "parameters": { + "mode": "normal", + "min_match_ident": 100, + "min_match_cov": 100, + "max_ambiguous": 0, + "max_internal_stops": 0 + }, + "data": { + "sample_name": "G5", + "profile": { + "G5": { + "locus_1": "e9e707ebc64e10a881f1323ebff85369", + "locus_2": "8b70e777f6bbf2c91ff75947824b5976", + "locus_3": "670705cd2a59c4a23a897ac656a888fe", + "locus_4": "73790840c76943caac0ebb3b2b3f0b98", + "locus_5": "8cf4341689dd00f74adfcc43d1f4a35e", + "locus_6": "a11561f2804e2c32c78049f8b9aeb517", + "locus_7": "49d9878c9d3071aa1d2f26cb947b784c", + "locus_8": "7ebe74afecf146ec4db816c8deced64f", + "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", + "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", + "locus_11": "c4266f2f24fdd8e039113c6b0955af9f", + "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", + "locus_13": "8f300259dcb46224bdc1fe5273107324", + "locus_14": "2fa0b06ed72e36b4071cab9d0b4f87d0", + "locus_15": "bc98c2fe196a68a79036814396513a8d", + "locus_16": "a9b3cb97dac3cda6e932a49bf9a507bd", + "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", + "locus_18": "b3021e979faa7600756c06dfadfcf14c", + "locus_19": "de32372598811d63bcc1a0eaf6872644", + "locus_20": "4461918e985715e4a2b07494e1f91326" + } + }, + "seq_data": {} + } +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/normal/G5/run.json b/tests/test_data/outputs/report/normal/G5/run.json new file mode 100755 index 0000000..331aa5c --- /dev/null +++ b/tests/test_data/outputs/report/normal/G5/run.json @@ -0,0 +1,21 @@ +{ + "analysis_start_time": "19/06/2024 10:44:44", + "parameters": { + "command": "report", + "input": "test_dev/search/G5/seq_store.json", + "fasta": null, + "config": null, + "outdir": "test_dev/report/normal/G5", + "name": "G5", + "mode": "normal", + "prop": "locus_name", + "max_ambig": 0, + "max_stop": 0, + "match_ident": 100, + "match_cov": 100, + "translation_table": 11, + "force": true + }, + "result_file": "test_dev/report/normal/G5/report.json", + "analysis_end_time": "19/06/2024 10:44:44" +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/normal/G6/report.json b/tests/test_data/outputs/report/normal/G6/report.json new file mode 100755 index 0000000..2bf9535 --- /dev/null +++ b/tests/test_data/outputs/report/normal/G6/report.json @@ -0,0 +1,49 @@ +{ + "db_info": { + "db_name": "Locidex Test Database", + "db_version": "1.0.0", + "db_date": "2024/06/06", + "db_author": "James Robertson", + "db_desc": "Using Salmonella GCA_000007545.2 as source data", + "db_num_seqs": 20, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" + }, + "parameters": { + "mode": "normal", + "min_match_ident": 100, + "min_match_cov": 100, + "max_ambiguous": 0, + "max_internal_stops": 0 + }, + "data": { + "sample_name": "G6", + "profile": { + "G6": { + "locus_1": "a47cc24760462371e919143c5cc81376", + "locus_2": "e35184c8ff18e9116fc8faef20532f56", + "locus_3": "670705cd2a59c4a23a897ac656a888fe", + "locus_4": "ac1b21798c0f672ad26f5a91ea278590", + "locus_5": "d00defcca8588f21ce16fa1d0ac13389", + "locus_6": "a11561f2804e2c32c78049f8b9aeb517", + "locus_7": "dc94bf1ec4ff9bed2a1f460cbd958656", + "locus_8": "7ebe74afecf146ec4db816c8deced64f", + "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", + "locus_10": "fe04d17ec353c08b903c85fc0ca4dc02", + "locus_11": "5b128d659955716833ce42f2bb060212", + "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", + "locus_13": "8f300259dcb46224bdc1fe5273107324", + "locus_14": "b9060019038526aa6fc38d2f7510edc6", + "locus_15": "bc98c2fe196a68a79036814396513a8d", + "locus_16": "16e55766c603fe33c9e75d8e81743ae2", + "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", + "locus_18": "b3021e979faa7600756c06dfadfcf14c", + "locus_19": "a012eee23637b48e39b00808a057e35d", + "locus_20": "4461918e985715e4a2b07494e1f91326" + } + }, + "seq_data": {} + } +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/normal/G6/run.json b/tests/test_data/outputs/report/normal/G6/run.json new file mode 100755 index 0000000..6c05ad9 --- /dev/null +++ b/tests/test_data/outputs/report/normal/G6/run.json @@ -0,0 +1,21 @@ +{ + "analysis_start_time": "19/06/2024 10:44:46", + "parameters": { + "command": "report", + "input": "test_dev/search/G6/seq_store.json", + "fasta": null, + "config": null, + "outdir": "test_dev/report/normal/G6", + "name": "G6", + "mode": "normal", + "prop": "locus_name", + "max_ambig": 0, + "max_stop": 0, + "match_ident": 100, + "match_cov": 100, + "translation_table": 11, + "force": true + }, + "result_file": "test_dev/report/normal/G6/report.json", + "analysis_end_time": "19/06/2024 10:44:46" +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/normal/G7/report.json b/tests/test_data/outputs/report/normal/G7/report.json new file mode 100755 index 0000000..65f3426 --- /dev/null +++ b/tests/test_data/outputs/report/normal/G7/report.json @@ -0,0 +1,49 @@ +{ + "db_info": { + "db_name": "Locidex Test Database", + "db_version": "1.0.0", + "db_date": "2024/06/06", + "db_author": "James Robertson", + "db_desc": "Using Salmonella GCA_000007545.2 as source data", + "db_num_seqs": 20, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" + }, + "parameters": { + "mode": "normal", + "min_match_ident": 100, + "min_match_cov": 100, + "max_ambiguous": 0, + "max_internal_stops": 0 + }, + "data": { + "sample_name": "G7", + "profile": { + "G7": { + "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", + "locus_2": "8b70e777f6bbf2c91ff75947824b5976", + "locus_3": "670705cd2a59c4a23a897ac656a888fe", + "locus_4": "73790840c76943caac0ebb3b2b3f0b98", + "locus_5": "8cf4341689dd00f74adfcc43d1f4a35e", + "locus_6": "a11561f2804e2c32c78049f8b9aeb517", + "locus_7": "49d9878c9d3071aa1d2f26cb947b784c", + "locus_8": "7ebe74afecf146ec4db816c8deced64f", + "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", + "locus_10": "796419469778f7ec3851c813f59cfff7", + "locus_11": "c4266f2f24fdd8e039113c6b0955af9f", + "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", + "locus_13": "8f300259dcb46224bdc1fe5273107324", + "locus_14": "2fa0b06ed72e36b4071cab9d0b4f87d0", + "locus_15": "bc98c2fe196a68a79036814396513a8d", + "locus_16": "a9b3cb97dac3cda6e932a49bf9a507bd", + "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", + "locus_18": "b3021e979faa7600756c06dfadfcf14c", + "locus_19": "de32372598811d63bcc1a0eaf6872644", + "locus_20": "4461918e985715e4a2b07494e1f91326" + } + }, + "seq_data": {} + } +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/normal/G7/run.json b/tests/test_data/outputs/report/normal/G7/run.json new file mode 100755 index 0000000..920bc8b --- /dev/null +++ b/tests/test_data/outputs/report/normal/G7/run.json @@ -0,0 +1,21 @@ +{ + "analysis_start_time": "19/06/2024 10:44:48", + "parameters": { + "command": "report", + "input": "test_dev/search/G7/seq_store.json", + "fasta": null, + "config": null, + "outdir": "test_dev/report/normal/G7", + "name": "G7", + "mode": "normal", + "prop": "locus_name", + "max_ambig": 0, + "max_stop": 0, + "match_ident": 100, + "match_cov": 100, + "translation_table": 11, + "force": true + }, + "result_file": "test_dev/report/normal/G7/report.json", + "analysis_end_time": "19/06/2024 10:44:48" +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/normal/G8/report.json b/tests/test_data/outputs/report/normal/G8/report.json new file mode 100755 index 0000000..2fbc2c5 --- /dev/null +++ b/tests/test_data/outputs/report/normal/G8/report.json @@ -0,0 +1,49 @@ +{ + "db_info": { + "db_name": "Locidex Test Database", + "db_version": "1.0.0", + "db_date": "2024/06/06", + "db_author": "James Robertson", + "db_desc": "Using Salmonella GCA_000007545.2 as source data", + "db_num_seqs": 20, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" + }, + "parameters": { + "mode": "normal", + "min_match_ident": 100, + "min_match_cov": 100, + "max_ambiguous": 0, + "max_internal_stops": 0 + }, + "data": { + "sample_name": "G8", + "profile": { + "G8": { + "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", + "locus_2": "e35184c8ff18e9116fc8faef20532f56", + "locus_3": "670705cd2a59c4a23a897ac656a888fe", + "locus_4": "ac1b21798c0f672ad26f5a91ea278590", + "locus_5": "d00defcca8588f21ce16fa1d0ac13389", + "locus_6": "a11561f2804e2c32c78049f8b9aeb517", + "locus_7": "dc94bf1ec4ff9bed2a1f460cbd958656", + "locus_8": "7ebe74afecf146ec4db816c8deced64f", + "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", + "locus_10": "fd6284b58a891cf02058906c9ee37a00", + "locus_11": "5b128d659955716833ce42f2bb060212", + "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", + "locus_13": "8f300259dcb46224bdc1fe5273107324", + "locus_14": "b9060019038526aa6fc38d2f7510edc6", + "locus_15": "bc98c2fe196a68a79036814396513a8d", + "locus_16": "16e55766c603fe33c9e75d8e81743ae2", + "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", + "locus_18": "b3021e979faa7600756c06dfadfcf14c", + "locus_19": "a012eee23637b48e39b00808a057e35d", + "locus_20": "4461918e985715e4a2b07494e1f91326" + } + }, + "seq_data": {} + } +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/normal/G8/run.json b/tests/test_data/outputs/report/normal/G8/run.json new file mode 100755 index 0000000..9c2d841 --- /dev/null +++ b/tests/test_data/outputs/report/normal/G8/run.json @@ -0,0 +1,21 @@ +{ + "analysis_start_time": "19/06/2024 10:44:50", + "parameters": { + "command": "report", + "input": "test_dev/search/G8/seq_store.json", + "fasta": null, + "config": null, + "outdir": "test_dev/report/normal/G8", + "name": "G8", + "mode": "normal", + "prop": "locus_name", + "max_ambig": 0, + "max_stop": 0, + "match_ident": 100, + "match_cov": 100, + "translation_table": 11, + "force": true + }, + "result_file": "test_dev/report/normal/G8/report.json", + "analysis_end_time": "19/06/2024 10:44:50" +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/normal/G9/report.json b/tests/test_data/outputs/report/normal/G9/report.json new file mode 100755 index 0000000..02bfd4c --- /dev/null +++ b/tests/test_data/outputs/report/normal/G9/report.json @@ -0,0 +1,49 @@ +{ + "db_info": { + "db_name": "Locidex Test Database", + "db_version": "1.0.0", + "db_date": "2024/06/06", + "db_author": "James Robertson", + "db_desc": "Using Salmonella GCA_000007545.2 as source data", + "db_num_seqs": 20, + "is_nucl": true, + "is_prot": true, + "nucleotide_db_name": "nucleotide", + "protein_db_name": "protein" + }, + "parameters": { + "mode": "normal", + "min_match_ident": 100, + "min_match_cov": 100, + "max_ambiguous": 0, + "max_internal_stops": 0 + }, + "data": { + "sample_name": "G9", + "profile": { + "G9": { + "locus_1": "d17b02d12afa7f832ee37df6f24a8f55", + "locus_2": "8b70e777f6bbf2c91ff75947824b5976", + "locus_3": "670705cd2a59c4a23a897ac656a888fe", + "locus_4": "73790840c76943caac0ebb3b2b3f0b98", + "locus_5": "8cf4341689dd00f74adfcc43d1f4a35e", + "locus_6": "a11561f2804e2c32c78049f8b9aeb517", + "locus_7": "49d9878c9d3071aa1d2f26cb947b784c", + "locus_8": "7ebe74afecf146ec4db816c8deced64f", + "locus_9": "41ebb36872854b2b33c8c028e23d8ad1", + "locus_10": "-", + "locus_11": "c4266f2f24fdd8e039113c6b0955af9f", + "locus_12": "eb72da68c159497d5f0c8eeddc51b5ae", + "locus_13": "8f300259dcb46224bdc1fe5273107324", + "locus_14": "2fa0b06ed72e36b4071cab9d0b4f87d0", + "locus_15": "bc98c2fe196a68a79036814396513a8d", + "locus_16": "a9b3cb97dac3cda6e932a49bf9a507bd", + "locus_17": "a0d97d985483413f3c18bfe5833ae9ce", + "locus_18": "b3021e979faa7600756c06dfadfcf14c", + "locus_19": "de32372598811d63bcc1a0eaf6872644", + "locus_20": "4461918e985715e4a2b07494e1f91326" + } + }, + "seq_data": {} + } +} \ No newline at end of file diff --git a/tests/test_data/outputs/report/normal/G9/run.json b/tests/test_data/outputs/report/normal/G9/run.json new file mode 100755 index 0000000..d6fa8d0 --- /dev/null +++ b/tests/test_data/outputs/report/normal/G9/run.json @@ -0,0 +1,21 @@ +{ + "analysis_start_time": "19/06/2024 10:44:52", + "parameters": { + "command": "report", + "input": "test_dev/search/G9/seq_store.json", + "fasta": null, + "config": null, + "outdir": "test_dev/report/normal/G9", + "name": "G9", + "mode": "normal", + "prop": "locus_name", + "max_ambig": 0, + "max_stop": 0, + "match_ident": 100, + "match_cov": 100, + "translation_table": 11, + "force": true + }, + "result_file": "test_dev/report/normal/G9/report.json", + "analysis_end_time": "19/06/2024 10:44:52" +} \ No newline at end of file diff --git a/tests/test_data/outputs/search/G1/nucleotide/hsps.txt b/tests/test_data/outputs/search/G1/nucleotide/hsps.txt new file mode 100755 index 0000000..cdab2ba --- /dev/null +++ b/tests/test_data/outputs/search/G1/nucleotide/hsps.txt @@ -0,0 +1,20 @@ +0 0 102 102 1 102 1 102 102 0 100.000 100 100 plus 1.80e-51 189 +1 9 762 762 1 762 1 762 762 0 100.000 100 100 plus 0.0 1408 +2 10 858 858 1 858 1 858 858 19 97.786 100 100 plus 0.0 1480 +3 11 972 972 1 972 1 972 972 0 100.000 100 100 plus 0.0 1796 +4 12 1098 1098 1 1098 1 1098 1098 0 100.000 100 100 plus 0.0 2028 +5 13 1281 1281 1 1281 1 1281 1281 11 99.141 100 100 plus 0.0 2305 +6 14 1434 1434 1 1434 1 1434 1434 0 100.000 100 100 plus 0.0 2649 +7 15 1464 1464 1 1464 1 1464 1464 15 98.975 100 100 plus 0.0 2621 +8 16 1836 1836 1 1836 1 1836 1836 0 100.000 100 100 plus 0.0 3391 +9 17 1914 1914 1 1914 1 1914 1914 0 100.000 100 100 plus 0.0 3535 +10 18 2037 2037 1 2037 1 2037 2037 16 99.215 100 100 plus 0.0 3674 +11 1 285 285 1 285 1 285 285 17 94.035 100 100 plus 2.31e-124 433 +12 19 4935 4935 1 4935 1 4935 4935 0 100.000 100 100 plus 0.0 9114 +13 2 327 327 1 327 1 327 327 0 100.000 100 100 plus 5.34e-176 604 +14 3 417 417 1 417 1 417 417 11 97.362 100 100 plus 0.0 710 +15 4 444 444 1 444 1 444 444 15 96.622 100 100 plus 0.0 737 +16 5 543 543 1 543 1 543 543 0 100.000 100 100 plus 0.0 1003 +17 6 606 606 1 606 1 606 606 15 97.525 100 100 plus 0.0 1037 +18 7 642 642 1 642 1 642 642 0 100.000 100 100 plus 0.0 1186 +19 8 684 684 1 684 1 684 684 0 100.000 100 100 plus 0.0 1264 diff --git a/tests/test_data/outputs/search/G1/nucleotide/queries.fasta b/tests/test_data/outputs/search/G1/nucleotide/queries.fasta new file mode 100755 index 0000000..ce3c3a9 --- /dev/null +++ b/tests/test_data/outputs/search/G1/nucleotide/queries.fasta @@ -0,0 +1,40 @@ +>0 +atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa +>1 +atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa +>2 +gtgaacctggtgaaaaccacgggtctgcacgcggcgggcaaaggcattaacgttgccaaagtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccgacttcaacttttccggctttgacgtgactcccgcagactgggaacgctttgttaacgactccctgagctggctgggtcagttcgatatggtctgcgtcagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatctttgatagtagccgtgaagcgttagtcgccggtcttaaagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcacgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgtgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggtaagtaccgtcggcgcaggcgattccatggttggcggattgatttacggcctgctgatgcgcgagtccagcgaacatacgctgcgtctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga +>3 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>4 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>5 +ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctcttccgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccaacgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcagaaaggcgggattggtttgcgcggtccgggtgaaacccagctcgaaaccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgaaaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgctgctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaaatccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtctgcaggtacgaatgccgattgtcgactggcgtcgcctctgtaaacaagaaccagcgttgatcgaatacgtgatctag +>6 +gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga +>7 +atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctggtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatcagcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacgctgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcattcaaccggaatggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaacgctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgaggcgctggcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtgagctgaccagccttgtgcgcaaccttaatcaactgctcaaaagcgagcgcgaacgttataacaaataccgcacgaccctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatcagcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcataccccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttttggccgacagcatcccacacagaaagaggaataa +>8 +atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa +>9 +atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag +>10 +atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacattgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccaccgtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggccgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattactggaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcattcagggcggtacggaagtttgtcagcaaaaactggataccatgcgcaacgcaggcgtaaaagtgaacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggcgtcagcgcagagatcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacaggagaccggcaagctcggcgagatcctgttctttatgcgtgcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccggcaataactggcagttcgacggcgacgcggaaactattgcccactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgaaattaccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa +>11 +atgaaactttttctgacaacggcagcgctaactgcgaccttgatttccggcatggcatttgccagcgatcccgtcatcccctgggcgactaacagcggcggtacggaaagcacgcatattgcggcaatgggcgaggatttgaatgcgcagcatcagcagatcacccacacgcatgaaggcgtatgggcggccaattccggcagcattcaggctgatgaagccgcgctgaccagcaacaagccgccggtacaaggccacccggaattgatgccgcatcagggctaa +>12 +atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa +>13 +atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa +>14 +ctgaaactaatacctttttatttgctcgcgctattttctgccgcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaaccactgcagataacccgccttataccatgacaatattgccgccagtagaaggctgcggggaaaaatgtgtgaaactgaacgtacagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaaggacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctataaccgacagctgcaattactttgggctggcgtaataccgaaaaataaaaaggggaaatggataacgaaaatggaaaattactggttcaggcaggtaaaagcgcactaa +>15 +atgaaaaaattttatagctgtcttcctgtctttttactgatcggctgtgcccaggtgcccctcccttcctccgtgagcaaaccggtacagcaacctggcgctcagaaagagcaactggccaacgcaaatagtattgatgagtgtcagtctcttccgtatgtgccgtcagaccttgcgaagaataaatcattatcaaaccagaacgctgataattccgcatcaaaaaatagcgcaatcagctcaagcattttttgcgaaaaatataaacaaaccaaagaacaggcgcttaccttcttccaggaacatccacaatacatgcgttcgaaagaagatgaagagcaactcatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa +>16 +atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa +>17 +gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggcctcacaacatgcctggagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgtcagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcaccatccagtcggcaacctggttttccgcaggcaggtcgcttacggcggataaaaaaatgatggattgcggcagcggtatttatgcttcaatcaatacgttactgaaaaagtcgcagaataaaaatatagtcatttttacccataatcattgtttgacgtatattgcaaaaaataagcgcggcgtgaagtttgatccggactatcttaatgcgctcgttatgtatgcggaaaacggaaaactctttttagacggtgaatttgttcccggctag +>18 +atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga +>19 +atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/search/G1/protein/hsps.txt b/tests/test_data/outputs/search/G1/protein/hsps.txt new file mode 100755 index 0000000..233979d --- /dev/null +++ b/tests/test_data/outputs/search/G1/protein/hsps.txt @@ -0,0 +1,20 @@ +0 0 34 34 1 34 1 34 34 0 100.000 100 100 N/A 4.94e-20 64.3 +1 9 254 254 1 254 1 254 254 0 100.000 100 100 N/A 0.0 514 +2 10 286 286 1 286 1 286 286 17 94.056 100 100 N/A 0.0 518 +3 11 324 324 1 324 1 324 324 0 100.000 100 100 N/A 0.0 640 +4 12 366 366 1 366 1 366 366 0 100.000 100 100 N/A 0.0 754 +5 13 427 427 1 427 1 427 427 9 97.892 100 100 N/A 0.0 846 +6 14 478 478 1 478 1 478 478 0 100.000 100 100 N/A 0.0 972 +7 15 488 488 1 488 1 488 488 14 97.131 100 100 N/A 0.0 972 +8 16 612 612 1 612 1 612 612 0 100.000 100 100 N/A 0.0 1241 +9 17 638 638 1 638 1 638 638 0 100.000 100 100 N/A 0.0 1326 +10 18 679 679 1 679 1 679 679 8 98.822 100 100 N/A 0.0 1403 +11 1 95 95 1 95 1 95 95 11 88.421 100 100 N/A 2.89e-51 147 +12 19 1645 1645 1 1645 1 1645 1645 0 100.000 100 100 N/A 0.0 3332 +13 2 109 109 1 109 1 109 109 0 100.000 100 100 N/A 3.09e-81 224 +14 3 139 139 1 139 1 139 139 9 93.525 100 100 N/A 3.70e-98 269 +15 4 148 148 1 148 1 148 148 11 92.568 100 100 N/A 1.56e-97 268 +16 5 181 181 1 181 1 181 181 0 100.000 100 100 N/A 1.79e-140 380 +17 6 202 202 1 202 1 202 202 14 93.069 100 100 N/A 2.36e-141 384 +18 7 214 214 1 214 1 214 214 0 100.000 100 100 N/A 1.75e-157 426 +19 8 228 228 1 228 1 228 228 0 100.000 100 100 N/A 6.32e-172 463 diff --git a/tests/test_data/outputs/search/G1/protein/queries.fasta b/tests/test_data/outputs/search/G1/protein/queries.fasta new file mode 100755 index 0000000..3ac162b --- /dev/null +++ b/tests/test_data/outputs/search/G1/protein/queries.fasta @@ -0,0 +1,40 @@ +>0 +MYDPPFLEALMITASFFAIFIIIVVSVLLLEGD* +>1 +MRILLSNDDGVHAPGIQTLAKALREFADVQVVAPDRNRSGASNSLTLESSLRTFTFDNGDIAVQMGTPTDCVYLGVNALMRPRPDIVVSGINAGPNLGDDVIYSGTVAAAMEGRHLGFPALAVSLNGYQHYDTAAAVTCALLRGLSREPLRTGRILNVNVPDLPLAQVKGIRVTRCGSRHPADKVIPQEDPRGNTLYWIGPPGDKYDAGPDTDFAAVDEGYVSVTPLHVDLTAHSAHDVVSDWLDSVGVGTQW* +>2 +VNLVKTTGLHAAGKGINVAKVLKDLGIDVTVGGFLGKDNQDGFQQLFSELGIANRFQVVQGRTRINVKLTEKDGEVTDFNFSGFDVTPADWERFVNDSLSWLGQFDMVCVSGSLPAGVSPEAFTDWMTRLRSQCPCIIFDSSREALVAGLKAAPWLVKPNRRELEIWAGRKLPEMKDVIDAAHALREQGIAHVVISLGAEGALWVNASGEWIAKPPAVDVVSTVGAGDSMVGGLIYGLLMRESSEHTLRLATAVAALAVSQSNVGITDRPQLAAMMARVDLQPFN* +>3 +MNKNKYSTPLLMLATILAGMLSPMQSAVNGQLGHWLQDGNACAVISFASGLVVMFFIIIARKETRQQFASIPTLIKKRKIPLWNWFAGLCGAMVVFSEGASASALGVATFQTALISALLLSGLLCDRFGIGVEEKKYFTPWRITGALFAVIATIFVVSPQWHSTSFILLAILPFLAGLLAGWQPAGNAKVAEATGSMLVSITWNFIVGFCVLGAALAIRIALGHVTIQLPDTWWMYLGGPLGLLSIGLMAILVRGLGLLMLGVASTAGQLLGSVLIDELIPSLGNTVYLVTIIGTLFALVGAIVTTIPEYRASKMAQKMEVSE* +>4 +MKGRWAKYVATGVMLAMLAACSSKPTDRGQQYKDGKFTQPFSLVNQPDAVGAPINAGDFAEQVDQIRSASPRLYTNQSNVYNAVQNWLRSGGDTRTMRQFGIDAWQMEGTDNYGNVQFTGYYTPVVQARHTRQGAFQYPIYSMPPKRGRLPSRAQIYAGALSDKYILAWSNSLMDNFIMDVQGSGYIDFGDGSPLNFFSYAGKNGWPYRSIGKVLIDRGEVKKEDMSMQAIREWGEKHSEAEVRELLEQNPSFVFFKPQSFAPVKGASAVPLIGRASVASDRSIIPPGTTLLAEVPLLDNNGKFSGQYELRLMVALDVGGAIKGQHFDIYQGIGPDAGHRAGWYNHYGRVWVLKSAPGAGNVFSG* +>5 +LFDRYDAGEQAVLVHIYFSQDKDMEDLQEFESLVSSAGVEAMQVITGSRKAPHPKYFVGEGKAVEIAEAVKATGAAVVLFDHALSPAQERNLERLCECRVIDRTGLILDIFAQRARTHEGKLQVELAQLRHLATRLVRGWTHLERQKGGIGLRGPGETQLETDRRLLRNRIVQIQSRLEKVEKQREQGRQSRIKADVPTVSLVGYTNAGKSTLFNQITEARVYAADQLFATLDPTLRRIDVADVGETVLADTVGFIRHLPYDLVAAFKATLQETRQATLLLHVVDAADVRVQENIEAVNTVLEEIDAHEIPTLMVMNKIDMLDDFEPRIDRDEENKPIRVWLSAQSGVGIPQLFQALTERLSGEVAQHTLRLLPQEGRLRSRFYQLQAIEKEWMEEDGSVSLQVRMPIVDWRRLCKQEPALIEYVI* +>6 +VAQRILVLGASGYIGQHLVFALSQQGHQVRAAARRVERLEKHRLANVSCHKVDLHWPENLPALLRDIDTVYYLVHGMGEGGDFIAHERQAALNVRDALRQTPVKQLIFLSSLQAPAHEQSDHLRARQLTADTLRDAGVPVTELRAGIIVGAGSAAFEVMRDMVYNLPILTPPRWVRSRTTPIALENLLYYLVGLLEHPAHEHRILEAAGPQVLSYQQQFERFMAVSGKRRPLIPVPFPTRWISVWFLNVITSVPPTTAKALIQGLRHDLLADDAALKKLIPQTLITFDDAVRRTLKEEEKLVNSSDWGYDALAFARWRPEYGYFPKQAGFTAQTPASLSALWQVVNRLGGKEGYFFGNILWQTRAAMDRLVGHKLAKGRPSHTLLKPGDTVDSWKVIIVEPEKQLTLLFGMKAPGLGRLSFTLHDKGRYREIDVRAWWHPHGMPGLIYWLLMIPAHLFIFRGMARRIARLAEQITEK* +>7 +MNKFARHFLPLSLRVRFLLATAGVVLVLSLAYGIVALVGYSVSFDKTTFRLLRGESNLFYTLAKWENNKISVELPENLDMQSPTMTLIYDETGKLLWTQRNIPWLIKSIQPEWLKTNGFHEIETNVDATSTLLSEDHSAQEKLKEVREDDDDAEMTHSVAVNIYPATTRMPQLTIVVVDTIPIELKRSYMVWSWFVYVLAANLLLVIPLLWIAAWWSLRPIEALAREVRELEDHHREMLNPETTRELTSLVRNLNQLLKSERERYNKYRTTLTDLTHSLKTPLAVLQSTLRSLRNEKMSVSKAEPVMLEQISRISQQIGYYLHRASMRGSGVLLSRELHPVAPLLDNLISALNKVYQRKGVNISMDISPEISFVGEQNDFVEVMGNVLDNACKYCLEFVEISARQTDDHLHIFVEDDGPGIPHSKRSLVFDRGQRADTLRPGQGVGLAVAREITEQYAGQIIASDSLLGGARMEVVFGRQHPTQKEE* +>8 +MTIQKRLLEAVEQKLLRPIDAQFALTVAGNDDPAVTLAAALLSHDAGEGHVCLPLSRLTLTEEAHPLLVAWISETATPIDWKKRLLASAAVSCGDSPAPLILCGDRLYLNRMWCNERTVARFFNEVNQAIAVDEDQLSRILDALFPPTDEVNWQKVAAAVALTRRISVISGGPGTGKTTTVAKLLAALIQMADGERCRIRLAAPTGKAAARLTESLGAALRQLPLTDAQKKRIPEDASTLHRLLGAQPGSQRLRHHAGNPLHLDVLVVDEASMIDLPMMSRLIDALPPHGRVIFLGDRDQLASVEAGAVLGDICAYVNAGFTAERARQLSRLTGSAIPAGAGTQAASLRDSLCLLQKSYRFGSDSGIGKLAAAINCGDRSAIQAVFQQGFSDIEKRTLQSSDDYAGMLDEALAGYGRYLRLLHEKAAPEAILQAFNEYQLLCALREGPFGVRGLNDRIEQAMVQQRKIQRHPHSRWYEGRPVMIARNDSALGLFNGDIGIALDRGQGLRVWFVMPDGTIKSVQPSRLPEHDTTWAMTVHKSQGSEFDHAALILPSQRSPVVTRELVYTAVTRARRRLSLYADERILAGAIVTRTERRSGLATLFDEVSRIG* +>9 +MQEVAMSSQEASKMLRTYNIAWWGNNYYDVNELGHISVCPDPDVPEARVDLAKLVKAREAQGQRLPALFCFPQILQHRLRSINAAFKRARESYGYNGDYFLVYPIKVNQHRRVIESLIHSGEPLGLEAGSKAELMAVLAHAGMTRSVIVCNGYKDREYIRLALIGEKMGHKVYLVIEKMSEIAIVLEEAERLNVVPRLGVRARLASQGSGKWQSSGGEKSKFGLAATQVLQLVETLRDAGRLDSLQLLHFHLGSQMANIRDIATGVRESARFYVELHKLGVNIQCFDVGGGLGVDYEGTRSQSDCSVNYGLNEYANNIIWAIGDACEEHGLPHPTVITESGRAVTAHHTVLVSNIIGVERNEYTDPTAPAEDAPRALQNLWETWQEMHKPGTRRSLREWLHDSQMDLHDIHIGYSSGAFSLQERAWAEQLYLSMCHEVQKQLDPQNRAHRPIIDELQERMADKMYVNFSLFQSMPDAWGIDQLFPVLPLEGLDQVPERRAVLLDITCDSDGAIDHYIDGDGIATTMPMPEYDPENPPMLGFFMVGAYQEILGNMHNLFGDTEAVDVFVFPDGSVEVELSDEGDTVADMLQYVQLDPKTLLTHFRDQVKQTDLDDALQQQFLEEFEAGLYGYTYLEDE* +>10 +MNSLPQRSTDFELTTSQDGFALSWQQRLILRHSAENPCLWIGAGVADIDMFRGNFSIKDKLNEKIALTEATVSELPDGWLVQFSRGATISATLRISADEAGRLTLDLQNDDLHHNRIWLRLAANPDDHIYGCGEQFSYFDLRGKPFPLWTSEQGVGRNKTSYVTWQADCKENAGGDYYWTFFPQPTFVSTQKYYCHVDNSCYMNFDFSAPEYHELALWEDKTTLRFECADTYIALLEKLTALLGRQPELPDWVYDGVTLGIQGGTEVCQQKLDTMRNAGVKVNGIWAQDWSGIRMTSFGKRVMWNWKWNSDNYPQLDSRIKQWKEEGVQFLSYINPYVASDKDLCAEAARHGYLAKDATGGDYLVEFGEFYGGVVDLTNPEAYDWFKDVIKKNMIALGCSGWMADFGEYLPTDTYLHNGVSAEIMHNAWPALWAKCNYEALQETGKLGEILFFMRAGYTGSQKYSTMMWAGDQNVDWSLDDGLASVVPAALSLAMTGHGLHHSDIGGYTTLFDMKRSKELLLRWCDFSAFTPMMRTHEGNRPGNNWQFDGDAETIAHFARMTTVFTTLKPYLKQAVAQNAATGLPVMRPLFLHYENDAATYTLKYQYLLGQDLLVAPVHEQGRCDWTLYLPEDHWVNIWTGEAHHGGEITVDAPIGKPPVFYRAKSEWALLFASLRNI* +>11 +MKLFLTTAALTATLISGMAFASDPVIPWATNSGGTESTHIAAMGEDLNAQHQQITHTHEGVWAANSGSIQADEAALTSNKPPVQGHPELMPHQG* +>12 +MKHLRVVACMIMLALAGCDNNDKTAPTTKSEAPAVAQPSPAQDPSQLQKLAQQSQGKALTLLDASEAQLDGAATLVLTFSIPLDPEQDFSRVVHVVDKKSGSVDGAWELAPNLKELRLRHLEPERVLVVTVDPAVKALNNATFGKSYEKTITTRDVQPSVGFASRGSLLPGKIAEGLPVMALNVNHVDVNFFRVKPGSLASFVSQWEYRSSLSNWESDNLLKMADLVYTGRFDLNPARNTREKLLLPLSDIKPLQQAGVYVAVMNQAGHYNYSNAATLFTLSDIGVSAHRYHSRLDIFTQSLENGAAQSGIEIVLLNDKGQTLAQATSDAQGHVQLEADKAAALLLARKEGQTTLLDLTLPALDLSEFNVAGAPGYSKQFFMFGPRDLYRPGETVILNGLLRDSDGKTLPDQPVKLEVVKPDGQVMRTVVSQPENGLYRLNYPLDINAPTGLWHVRANTGDNLLRSWDFHVEDFMPERMALNLTAQKTPLAPADEVKFSVVGYYLYGAPANGNTLQGQLFLRPLRDAVAALPGFQFGNIAEENLSRSLDEVQLTLDKGGRGEVSAASQWQEAHSPLQVILQASLLESGGRPVTRRVEQAIWPADTLPGIRPQFAAKAVYDYRTDTTVNQPIVDEDSNAAFDIVYANAQGEKKAVSGLQVRLIRERRDYYWNWSESEGWQSQFDQKDLVEGEQTLDLNADETGKVSFPVEWGAYRLEVKAPNETVSSVRFWAGYSWQDNSDGSGAARPDRVTLKLDKANYRPGDTMKLHIAAPVAGKGYAMVESSDGPLWWQAIDVPAQGLELTIPVDKTWNRHDLYLSTLVVRPGDKSRSATPKRAVGLLHLPLGDDNRRLDLALESPAKMRPNQPLTVRVKASVKHGEMPKQINVLVSAVDSGVLNITDYATPDPWQAFFGQKRYGADIYDIYGQVIEGQGRLAALRFGGDGDDLTRGGKPPVNHANIIAQQAQPITLNEQGEGVVTLPIGDFNGELRVMAQAWTADDFGRGESKVVVAAPVIAELNMPRFLAGGDVSRLVLDVTNLTDRPQTLNIALAASGLLELLSQQPQPVNLAPGVRTTLFVPVRALEGFGEGEIQATISGLNLPGETLGAQHKQWQIGVRPAWPAQTVNSGIALAPGESWHVPEQHLANVSPATLQGQLLLSGKPPLNLARYIRELKAYPYGCLEQTTSGLFPALYTNAAQLQSLGITGDSDEKRRAAVDIGISRILQMQRDNGGFALWDENGAEEPWLTAYAMDFLIRAGEQGYSVPPEAINRGNERLLRYLQDPGTMLIRYSDNTQASTFAAQAYAALVLARQQKAPLGALREIWERRSQAASGLPLMQLGIALNTMGDARRGEEAITLALNTPRQDERQWIADYGSSLRDNALMLSLLEENNLRPDAQNALLSSLSEQAFGQRWLSTQENNALFLAAHSRQASAGAWQAQTSLEAQPLSGDKALTRNLDADQLAALEVTNTGSQPLWLRLDSSGYPSSAPEPASNVLQIERQILGTDGQRKSLSSLRSGELVLVWLTVVADRNVPDALVVDLLPAGLELENQNLADSSASLPESGSEVQNLLNQMQQADIQYMEFRDDRFVAAVVVNEGQPVTLVYLARAVTPGTYQLAQPQVESMYAPQWRATGASEGLLIVTP* +>13 +MAKNRSRRLRKKMHIDEFQELGFSVAWRFPEGTSEEQIDKTVDDFINDVIEPNKLAFDGSGYLAWEGLICMQEIGKCTEEHQAIVRKWLEARNLEEVRTSELFDVWWD* +>14 +LKLIPFYLLALFSAASGATEINACKDLIGTWKTTADNPPYTMTILPPVEGCGEKCVKLNVQYELDVTHRNALYCHEGQEGVKGQGPMVIAFEGAYGGHAIGTYNRQLQLLWAGVIPKNKKGKWITKMENYWFRQVKAH* +>15 +MKKFYSCLPVFLLIGCAQVPLPSSVSKPVQQPGAQKEQLANANSIDECQSLPYVPSDLAKNKSLSNQNADNSASKNSAISSSIFCEKYKQTKEQALTFFQEHPQYMRSKEDEEQLMTEFKKVLLEPGSKNLSIYQTLLAAHERLQAL* +>16 +MRIKPDDNWRWYYDEEHDRMMLDLANGMLFRSRFSRKMLTPDAFCPTGFCVDDAALYFSFEEKCRDFELTKEQRAELVLNALVAIRYLKPQMPKSWHFVAHGEMWTPGTGDAASVWLSDTAEQVNLLVVEPGENAALCLLAQPGVVIAGRTMQLGDAIKIMNDRLKPQVHCHSFSLEQAV* +>17 +VLAFTLRFIKNKRYFAILAGALVIIAGLASQHAWSGNGLPQINGKALAALAKQHPVVVLFRHAERCDRSDNTCLSDSTGITVNGAQDARALGKAFSADIQNYNLYSSNTVRTIQSATWFSAGRSLTADKKMMDCGSGIYASINTLLKKSQNKNIVIFTHNHCLTYIAKNKRGVKFDPDYLNALVMYAENGKLFLDGEFVPG* +>18 +MGSNYIVIEGLEGAGKTTARDVVVETLEQLGIRNMIFTREPGGTQLAEKLRSLVLDIRSVGDEVITDKAEVLMFYAARVQLVETVIKPALAQGVWVIGDRHDLSTQAYQGGGRGIDQTMLATLRDAVLGDFRPDLTLYLDVTPEVGLKRARARGDLDRIEQESFDFFNRTRARYLELAAQDSRIRTIDATQPLDAVMRDIRATVTKWVQEQAA* +>19 +MKHIKKSVLVVLLTSHVAHASIVVGGTRLVFDGNNDESSINVENKDSKANLVQSWLSVADPQVTNKQVFIITPPLFRLDAGQKNSIRVIRSGAPLPADRESMYWLNIKGIPSIDDNASANRVEISINTQIKLIYRPPALTKSTPDSQSQQLKWQTAGDVITVNNPTPYYMNFASVTLNSHEVKSATFVPPKSSASFKLSSTAAPHGTVTWRLISDYGMSLEPHSGSF* diff --git a/tests/test_data/outputs/search/G10/nucleotide/hsps.txt b/tests/test_data/outputs/search/G10/nucleotide/hsps.txt new file mode 100755 index 0000000..d91fbd6 --- /dev/null +++ b/tests/test_data/outputs/search/G10/nucleotide/hsps.txt @@ -0,0 +1,19 @@ +0 0 102 102 1 102 1 102 102 0 100.000 100 100 plus 1.80e-51 189 +1 9 762 762 1 762 1 762 762 0 100.000 100 100 plus 0.0 1408 +2 11 972 972 1 972 1 972 972 0 100.000 100 100 plus 0.0 1796 +3 12 1098 1098 1 1098 1 1098 1098 0 100.000 100 100 plus 0.0 2028 +4 13 1281 1281 1 1281 1 1281 1281 0 100.000 100 100 plus 0.0 2366 +5 14 1434 1434 1 1434 1 1434 1434 0 100.000 100 100 plus 0.0 2649 +6 15 1464 1464 1 1464 1 1464 1464 0 100.000 100 100 plus 0.0 2704 +7 16 1836 1836 1 1836 1 1836 1836 0 100.000 100 100 plus 0.0 3391 +8 17 1914 1914 1 1914 1 1914 1914 0 100.000 100 100 plus 0.0 3535 +9 18 2037 2037 1 2037 1 2037 2037 0 100.000 100 100 plus 0.0 3762 +10 1 285 285 1 285 1 285 285 0 100.000 100 100 plus 1.03e-152 527 +11 19 4935 4935 1 4935 1 4935 4935 0 100.000 100 100 plus 0.0 9114 +12 2 327 327 1 327 1 327 327 0 100.000 100 100 plus 5.34e-176 604 +13 3 417 417 1 417 1 417 417 0 100.000 100 100 plus 0.0 771 +14 4 444 444 1 444 1 444 444 0 100.000 100 100 plus 0.0 821 +15 5 543 543 1 543 1 543 543 0 100.000 100 100 plus 0.0 1003 +16 6 606 606 1 606 1 606 606 0 100.000 100 100 plus 0.0 1120 +17 7 642 642 1 642 1 642 642 0 100.000 100 100 plus 0.0 1186 +18 8 684 684 1 684 1 684 684 0 100.000 100 100 plus 0.0 1264 diff --git a/tests/test_data/outputs/search/G10/nucleotide/queries.fasta b/tests/test_data/outputs/search/G10/nucleotide/queries.fasta new file mode 100755 index 0000000..2bab250 --- /dev/null +++ b/tests/test_data/outputs/search/G10/nucleotide/queries.fasta @@ -0,0 +1,38 @@ +>0 +atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa +>1 +atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa +>2 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>3 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>4 +ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctattacgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccagcgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcggaaaggcgggattggttcgcgcggtccgggtgaaacccagctcgaagccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgagaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgcagctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaattccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtccgcaggtacgaatgccgattgtcgactggcgtcgcttctgtaaacaagaaccagcgttgatcgaatacgtgatctag +>5 +gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga +>6 +atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctcgtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatctgcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacggtgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcactcaaccggaagggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaaggctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgatgcgctgtcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtaagctgatcagccttgtgcgcaaccttaatcaactgctcaaaagcgagcacgaacgttataacaaataccgcacgagcctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatctgcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcatagcccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttctggccgacagcatcccacacagaaagaggaataa +>7 +atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa +>8 +atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag +>9 +atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacatcgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccacagtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggtcgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattacttgaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcatacagggcggtacggaagtttgtcagcaaaaactggataacatgcgcaacgcaggcgtaaaagtgtacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggggtcagcgcagagctcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacagaagaccggcaagctcggcgagatcctgttctttatgcgggcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccagcaatgactggcagttcgacggcgacgcggaaactattgcacactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgagatttccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa +>10 +atgaaactttttctgacaacggcagcgctaactgcgaccttgacttccggcatgggatttgccagcgataccgtgatcccctgggcgactaacagtggcggtacggaaagcacgcatatcgcggcaatgggcgaggatgtgaatgcgcagcatcagcacatcatccacacgcatgaaggcgtatgtgccgcaaattccggcaccattcaggctgatgaagccgcgctgaccagcaacaagccaccggtacaagtccaaccggaattgttgccgcatcagggctaa +>11 +atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa +>12 +atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa +>13 +ctgaaactaatacctttttatttgctcgcgctattttctgcctcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaacccctgcagataacccgccttataccgtgacaatattgccgccagtagaagcctgcggggaaaaatgtgtgaaactgaacgtagagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaacgacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctacaaccgacagctgcaattacttagggctggcgtaataccgaaaaataaaaagtggaaatggataacgaagatggaaaattactggttcaggcgggtaaaagcgcactaa +>14 +atgaaaaaatttgatagctgtcatcctgtctttttactgatcggctgtgcccaggtgcctctccgttcctccgtgagcaaaccggtacagcaacctagcgctcagaaagagcaactggccaacgcaaatggtattgatgagtgtcagtcacttccgtatgtgccttcagaccttgcgaagaataaatcattatcaaaccagatcgctgataataccgcatcaaaaaatagcgcaatcagctcacgcattttttgcgaaaaatataagcaaaccaaagaacaggcgcttaccttcttccaggaacttccacaatacatgcgttcgaaagaagttgaagagcaacacatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa +>15 +atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa +>16 +gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggactcacaacatgcctgtagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgccagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcgccatccagtcggcaacctggttttcggcaggcaggtcgcgtagggcggttaaaaaaatgatggattgcggcagcggtatttattcttcaatcaatacgttactgaaaaattcgcagattaaaaatatagtcattttaacccataatcattgtttgacgtatattgtaaaaaataagcgcggcgtgaagtttgatccggagtatcttaatgcgctcgttatgtatgcggaaaacggaaaactccttttagacggtgaatttgttcccggctag +>17 +atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga +>18 +atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/search/G10/protein/hsps.txt b/tests/test_data/outputs/search/G10/protein/hsps.txt new file mode 100755 index 0000000..e6894a0 --- /dev/null +++ b/tests/test_data/outputs/search/G10/protein/hsps.txt @@ -0,0 +1,19 @@ +0 0 34 34 1 34 1 34 34 0 100.000 100 100 N/A 4.94e-20 64.3 +1 9 254 254 1 254 1 254 254 0 100.000 100 100 N/A 0.0 514 +2 11 324 324 1 324 1 324 324 0 100.000 100 100 N/A 0.0 640 +3 12 366 366 1 366 1 366 366 0 100.000 100 100 N/A 0.0 754 +4 13 427 427 1 427 1 427 427 0 100.000 100 100 N/A 0.0 869 +5 14 478 478 1 478 1 478 478 0 100.000 100 100 N/A 0.0 972 +6 15 488 488 1 488 1 488 488 0 100.000 100 100 N/A 0.0 1004 +7 16 612 612 1 612 1 612 612 0 100.000 100 100 N/A 0.0 1241 +8 17 638 638 1 638 1 638 638 0 100.000 100 100 N/A 0.0 1326 +9 18 679 679 1 679 1 679 679 0 100.000 100 100 N/A 0.0 1419 +10 1 95 95 1 95 1 95 95 0 100.000 100 100 N/A 1.23e-68 191 +11 19 1645 1645 1 1645 1 1645 1645 0 100.000 100 100 N/A 0.0 3332 +12 2 109 109 1 109 1 109 109 0 100.000 100 100 N/A 3.09e-81 224 +13 3 139 139 1 139 1 139 139 0 100.000 100 100 N/A 2.34e-106 290 +14 4 148 148 1 148 1 148 148 0 100.000 100 100 N/A 1.04e-110 302 +15 5 181 181 1 181 1 181 181 0 100.000 100 100 N/A 1.79e-140 380 +16 6 202 202 1 202 1 202 202 0 100.000 100 100 N/A 3.71e-154 416 +17 7 214 214 1 214 1 214 214 0 100.000 100 100 N/A 1.75e-157 426 +18 8 228 228 1 228 1 228 228 0 100.000 100 100 N/A 6.32e-172 463 diff --git a/tests/test_data/outputs/search/G10/protein/queries.fasta b/tests/test_data/outputs/search/G10/protein/queries.fasta new file mode 100755 index 0000000..4396cd1 --- /dev/null +++ b/tests/test_data/outputs/search/G10/protein/queries.fasta @@ -0,0 +1,38 @@ +>0 +MYDPPFLEALMITASFFAIFIIIVVSVLLLEGD* +>1 +MRILLSNDDGVHAPGIQTLAKALREFADVQVVAPDRNRSGASNSLTLESSLRTFTFDNGDIAVQMGTPTDCVYLGVNALMRPRPDIVVSGINAGPNLGDDVIYSGTVAAAMEGRHLGFPALAVSLNGYQHYDTAAAVTCALLRGLSREPLRTGRILNVNVPDLPLAQVKGIRVTRCGSRHPADKVIPQEDPRGNTLYWIGPPGDKYDAGPDTDFAAVDEGYVSVTPLHVDLTAHSAHDVVSDWLDSVGVGTQW* +>2 +MNKNKYSTPLLMLATILAGMLSPMQSAVNGQLGHWLQDGNACAVISFASGLVVMFFIIIARKETRQQFASIPTLIKKRKIPLWNWFAGLCGAMVVFSEGASASALGVATFQTALISALLLSGLLCDRFGIGVEEKKYFTPWRITGALFAVIATIFVVSPQWHSTSFILLAILPFLAGLLAGWQPAGNAKVAEATGSMLVSITWNFIVGFCVLGAALAIRIALGHVTIQLPDTWWMYLGGPLGLLSIGLMAILVRGLGLLMLGVASTAGQLLGSVLIDELIPSLGNTVYLVTIIGTLFALVGAIVTTIPEYRASKMAQKMEVSE* +>3 +MKGRWAKYVATGVMLAMLAACSSKPTDRGQQYKDGKFTQPFSLVNQPDAVGAPINAGDFAEQVDQIRSASPRLYTNQSNVYNAVQNWLRSGGDTRTMRQFGIDAWQMEGTDNYGNVQFTGYYTPVVQARHTRQGAFQYPIYSMPPKRGRLPSRAQIYAGALSDKYILAWSNSLMDNFIMDVQGSGYIDFGDGSPLNFFSYAGKNGWPYRSIGKVLIDRGEVKKEDMSMQAIREWGEKHSEAEVRELLEQNPSFVFFKPQSFAPVKGASAVPLIGRASVASDRSIIPPGTTLLAEVPLLDNNGKFSGQYELRLMVALDVGGAIKGQHFDIYQGIGPDAGHRAGWYNHYGRVWVLKSAPGAGNVFSG* +>4 +LFDRYDAGEQAVLVHIYFSQDKDMEDLQEFESLVYYAGVEAMQVITGSRKAPHPKYFVGEGKAVEIAEAVKATGAAVVLFDHALSPAQERNLERLCECRVIDRTGLILDIFAQRARTHEGKLQVELAQLRHLATRLVRGWTHLERRKGGIGSRGPGETQLEADRRLLRNRIVQIQSRLEKVEKQREQGRQSRIKADVPTVSLVGYTNAGKSTLFNQITEARVYAADQLFATLDPTLRRIDVADVGETVLADTVGFIRHLPYDLVAAFKATLQETRQATQLLHVVDAADVRVQENIEAVNTVLEEIDAHEFPTLMVMNKIDMLDDFEPRIDRDEENKPIRVWLSAQSGVGIPQLFQALTERLSGEVAQHTLRLLPQEGRLRSRFYQLQAIEKEWMEEDGSVSPQVRMPIVDWRRFCKQEPALIEYVI* +>5 +VAQRILVLGASGYIGQHLVFALSQQGHQVRAAARRVERLEKHRLANVSCHKVDLHWPENLPALLRDIDTVYYLVHGMGEGGDFIAHERQAALNVRDALRQTPVKQLIFLSSLQAPAHEQSDHLRARQLTADTLRDAGVPVTELRAGIIVGAGSAAFEVMRDMVYNLPILTPPRWVRSRTTPIALENLLYYLVGLLEHPAHEHRILEAAGPQVLSYQQQFERFMAVSGKRRPLIPVPFPTRWISVWFLNVITSVPPTTAKALIQGLRHDLLADDAALKKLIPQTLITFDDAVRRTLKEEEKLVNSSDWGYDALAFARWRPEYGYFPKQAGFTAQTPASLSALWQVVNRLGGKEGYFFGNILWQTRAAMDRLVGHKLAKGRPSHTLLKPGDTVDSWKVIIVEPEKQLTLLFGMKAPGLGRLSFTLHDKGRYREIDVRAWWHPHGMPGLIYWLLMIPAHLFIFRGMARRIARLAEQITEK* +>6 +MNKFARHFLPLSLRVRFLLATAGVVLVLSLAYGIVALVGYSVSFDKTTFRLLRGESNLFYTLAKWENNKICVELPENLDMQSPTMTVIYDETGKLLWTQRNIPWLIKSTQPEGLKTNGFHEIETNVDATSTLLSEDHSAQEKLKEVREDDDDAEMTHSVAVNIYPATTRMPQLTIVVVDTIPIELKGSYMVWSWFVYVLAANLLLVIPLLWIAAWWSLRPIDALSREVRELEDHHREMLNPETTRKLISLVRNLNQLLKSEHERYNKYRTSLTDLTHSLKTPLAVLQSTLRSLRNEKMSVSKAEPVMLEQISRISQQIGYYLHRASMRGSGVLLSRELHPVAPLLDNLISALNKVYLRKGVNISMDISPEISFVGEQNDFVEVMGNVLDNACKYCLEFVEISARQTDDHLHIFVEDDGPGIAHSKRSLVFDRGQRADTLRPGQGVGLAVAREITEQYAGQIIASDSLLGGARMEVVSGRQHPTQKEE* +>7 +MTIQKRLLEAVEQKLLRPIDAQFALTVAGNDDPAVTLAAALLSHDAGEGHVCLPLSRLTLTEEAHPLLVAWISETATPIDWKKRLLASAAVSCGDSPAPLILCGDRLYLNRMWCNERTVARFFNEVNQAIAVDEDQLSRILDALFPPTDEVNWQKVAAAVALTRRISVISGGPGTGKTTTVAKLLAALIQMADGERCRIRLAAPTGKAAARLTESLGAALRQLPLTDAQKKRIPEDASTLHRLLGAQPGSQRLRHHAGNPLHLDVLVVDEASMIDLPMMSRLIDALPPHGRVIFLGDRDQLASVEAGAVLGDICAYVNAGFTAERARQLSRLTGSAIPAGAGTQAASLRDSLCLLQKSYRFGSDSGIGKLAAAINCGDRSAIQAVFQQGFSDIEKRTLQSSDDYAGMLDEALAGYGRYLRLLHEKAAPEAILQAFNEYQLLCALREGPFGVRGLNDRIEQAMVQQRKIQRHPHSRWYEGRPVMIARNDSALGLFNGDIGIALDRGQGLRVWFVMPDGTIKSVQPSRLPEHDTTWAMTVHKSQGSEFDHAALILPSQRSPVVTRELVYTAVTRARRRLSLYADERILAGAIVTRTERRSGLATLFDEVSRIG* +>8 +MQEVAMSSQEASKMLRTYNIAWWGNNYYDVNELGHISVCPDPDVPEARVDLAKLVKAREAQGQRLPALFCFPQILQHRLRSINAAFKRARESYGYNGDYFLVYPIKVNQHRRVIESLIHSGEPLGLEAGSKAELMAVLAHAGMTRSVIVCNGYKDREYIRLALIGEKMGHKVYLVIEKMSEIAIVLEEAERLNVVPRLGVRARLASQGSGKWQSSGGEKSKFGLAATQVLQLVETLRDAGRLDSLQLLHFHLGSQMANIRDIATGVRESARFYVELHKLGVNIQCFDVGGGLGVDYEGTRSQSDCSVNYGLNEYANNIIWAIGDACEEHGLPHPTVITESGRAVTAHHTVLVSNIIGVERNEYTDPTAPAEDAPRALQNLWETWQEMHKPGTRRSLREWLHDSQMDLHDIHIGYSSGAFSLQERAWAEQLYLSMCHEVQKQLDPQNRAHRPIIDELQERMADKMYVNFSLFQSMPDAWGIDQLFPVLPLEGLDQVPERRAVLLDITCDSDGAIDHYIDGDGIATTMPMPEYDPENPPMLGFFMVGAYQEILGNMHNLFGDTEAVDVFVFPDGSVEVELSDEGDTVADMLQYVQLDPKTLLTHFRDQVKQTDLDDALQQQFLEEFEAGLYGYTYLEDE* +>9 +MNSLPQRSTDFELTTSQDGFALSWQQRLILRHSAENPCLWIGAGVADIDMFRGNFSIKDKLNEKIALTEATVSELPDGWLVQFSRGATISATLRISADEAGRLTLDLQNDDLHHNRIWLRLAANPDDHIYGCGEQFSYFDLRGKPFPLWTSEQGVGRNKTSYVTWQADCKENAGGDYYLTFFPQPTFVSTQKYYCHVDNSCYMNFDFSAPEYHELALWEDKTTLRFECADTYIALLEKLTALLGRQPELPDWVYDGVTLGIQGGTEVCQQKLDNMRNAGVKVYGIWAQDWSGIRMTSFGKRVMWNWKWNSDNYPQLDSRIKQWKEEGVQFLSYINPYVASDKDLCAEAARHGYLAKDATGGDYLVEFGEFYGGVVDLTNPEAYDWFKDVIKKNMIALGCSGWMADFGEYLPTDTYLHNGVSAELMHNAWPALWAKCNYEALQKTGKLGEILFFMRAGYTGSQKYSTMMWAGDQNVDWSLDDGLASVVPAALSLAMTGHGLHHSDIGGYTTLFDMKRSKELLLRWCDFSAFTPMMRTHEGNRPSNDWQFDGDAETIAHFARMTTVFTTLKPYLKQAVAQNAATGLPVMRPLFLHYENDAATYTLKYQYLLGQDLLVAPVHEQGRCDWTLYLPEDHWVNIWTGEAHHGGEISVDAPIGKPPVFYRAKSEWALLFASLRNI* +>10 +MKLFLTTAALTATLTSGMGFASDTVIPWATNSGGTESTHIAAMGEDVNAQHQHIIHTHEGVCAANSGTIQADEAALTSNKPPVQVQPELLPHQG* +>11 +MKHLRVVACMIMLALAGCDNNDKTAPTTKSEAPAVAQPSPAQDPSQLQKLAQQSQGKALTLLDASEAQLDGAATLVLTFSIPLDPEQDFSRVVHVVDKKSGSVDGAWELAPNLKELRLRHLEPERVLVVTVDPAVKALNNATFGKSYEKTITTRDVQPSVGFASRGSLLPGKIAEGLPVMALNVNHVDVNFFRVKPGSLASFVSQWEYRSSLSNWESDNLLKMADLVYTGRFDLNPARNTREKLLLPLSDIKPLQQAGVYVAVMNQAGHYNYSNAATLFTLSDIGVSAHRYHSRLDIFTQSLENGAAQSGIEIVLLNDKGQTLAQATSDAQGHVQLEADKAAALLLARKEGQTTLLDLTLPALDLSEFNVAGAPGYSKQFFMFGPRDLYRPGETVILNGLLRDSDGKTLPDQPVKLEVVKPDGQVMRTVVSQPENGLYRLNYPLDINAPTGLWHVRANTGDNLLRSWDFHVEDFMPERMALNLTAQKTPLAPADEVKFSVVGYYLYGAPANGNTLQGQLFLRPLRDAVAALPGFQFGNIAEENLSRSLDEVQLTLDKGGRGEVSAASQWQEAHSPLQVILQASLLESGGRPVTRRVEQAIWPADTLPGIRPQFAAKAVYDYRTDTTVNQPIVDEDSNAAFDIVYANAQGEKKAVSGLQVRLIRERRDYYWNWSESEGWQSQFDQKDLVEGEQTLDLNADETGKVSFPVEWGAYRLEVKAPNETVSSVRFWAGYSWQDNSDGSGAARPDRVTLKLDKANYRPGDTMKLHIAAPVAGKGYAMVESSDGPLWWQAIDVPAQGLELTIPVDKTWNRHDLYLSTLVVRPGDKSRSATPKRAVGLLHLPLGDDNRRLDLALESPAKMRPNQPLTVRVKASVKHGEMPKQINVLVSAVDSGVLNITDYATPDPWQAFFGQKRYGADIYDIYGQVIEGQGRLAALRFGGDGDDLTRGGKPPVNHANIIAQQAQPITLNEQGEGVVTLPIGDFNGELRVMAQAWTADDFGRGESKVVVAAPVIAELNMPRFLAGGDVSRLVLDVTNLTDRPQTLNIALAASGLLELLSQQPQPVNLAPGVRTTLFVPVRALEGFGEGEIQATISGLNLPGETLGAQHKQWQIGVRPAWPAQTVNSGIALAPGESWHVPEQHLANVSPATLQGQLLLSGKPPLNLARYIRELKAYPYGCLEQTTSGLFPALYTNAAQLQSLGITGDSDEKRRAAVDIGISRILQMQRDNGGFALWDENGAEEPWLTAYAMDFLIRAGEQGYSVPPEAINRGNERLLRYLQDPGTMLIRYSDNTQASTFAAQAYAALVLARQQKAPLGALREIWERRSQAASGLPLMQLGIALNTMGDARRGEEAITLALNTPRQDERQWIADYGSSLRDNALMLSLLEENNLRPDAQNALLSSLSEQAFGQRWLSTQENNALFLAAHSRQASAGAWQAQTSLEAQPLSGDKALTRNLDADQLAALEVTNTGSQPLWLRLDSSGYPSSAPEPASNVLQIERQILGTDGQRKSLSSLRSGELVLVWLTVVADRNVPDALVVDLLPAGLELENQNLADSSASLPESGSEVQNLLNQMQQADIQYMEFRDDRFVAAVVVNEGQPVTLVYLARAVTPGTYQLAQPQVESMYAPQWRATGASEGLLIVTP* +>12 +MAKNRSRRLRKKMHIDEFQELGFSVAWRFPEGTSEEQIDKTVDDFINDVIEPNKLAFDGSGYLAWEGLICMQEIGKCTEEHQAIVRKWLEARNLEEVRTSELFDVWWD* +>13 +LKLIPFYLLALFSASSGATEINACKDLIGTWKTPADNPPYTVTILPPVEACGEKCVKLNVEYELDVTHRNALYCHERQEGVKGQGPMVIAFEGAYGGHAIGTYNRQLQLLRAGVIPKNKKWKWITKMENYWFRRVKAH* +>14 +MKKFDSCHPVFLLIGCAQVPLRSSVSKPVQQPSAQKEQLANANGIDECQSLPYVPSDLAKNKSLSNQIADNTASKNSAISSRIFCEKYKQTKEQALTFFQELPQYMRSKEVEEQHMTEFKKVLLEPGSKNLSIYQTLLAAHERLQAL* +>15 +MRIKPDDNWRWYYDEEHDRMMLDLANGMLFRSRFSRKMLTPDAFCPTGFCVDDAALYFSFEEKCRDFELTKEQRAELVLNALVAIRYLKPQMPKSWHFVAHGEMWTPGTGDAASVWLSDTAEQVNLLVVEPGENAALCLLAQPGVVIAGRTMQLGDAIKIMNDRLKPQVHCHSFSLEQAV* +>16 +VLAFTLRFIKNKRYFAILAGALVIIAGLDSQHACSGNGLPQINGKALAALAKQHPVVVLFRHAERCDRSDNTCLPDSTGITVNGAQDARALGKAFSADIQNYNLYSSNTVRAIQSATWFSAGRSRRAVKKMMDCGSGIYSSINTLLKNSQIKNIVILTHNHCLTYIVKNKRGVKFDPEYLNALVMYAENGKLLLDGEFVPG* +>17 +MGSNYIVIEGLEGAGKTTARDVVVETLEQLGIRNMIFTREPGGTQLAEKLRSLVLDIRSVGDEVITDKAEVLMFYAARVQLVETVIKPALAQGVWVIGDRHDLSTQAYQGGGRGIDQTMLATLRDAVLGDFRPDLTLYLDVTPEVGLKRARARGDLDRIEQESFDFFNRTRARYLELAAQDSRIRTIDATQPLDAVMRDIRATVTKWVQEQAA* +>18 +MKHIKKSVLVVLLTSHVAHASIVVGGTRLVFDGNNDESSINVENKDSKANLVQSWLSVADPQVTNKQVFIITPPLFRLDAGQKNSIRVIRSGAPLPADRESMYWLNIKGIPSIDDNASANRVEISINTQIKLIYRPPALTKSTPDSQSQQLKWQTAGDVITVNNPTPYYMNFASVTLNSHEVKSATFVPPKSSASFKLSSTAAPHGTVTWRLISDYGMSLEPHSGSF* diff --git a/tests/test_data/outputs/search/G11/nucleotide/hsps.txt b/tests/test_data/outputs/search/G11/nucleotide/hsps.txt new file mode 100755 index 0000000..ee96839 --- /dev/null +++ b/tests/test_data/outputs/search/G11/nucleotide/hsps.txt @@ -0,0 +1,21 @@ +0 0 102 102 1 102 1 102 102 0 100.000 100 100 plus 1.80e-51 189 +1 9 762 762 1 762 1 762 762 0 100.000 100 100 plus 0.0 1408 +2 10 858 858 1 858 1 858 858 19 97.786 100 100 plus 0.0 1480 +3 11 972 972 1 972 1 972 972 0 100.000 100 100 plus 0.0 1796 +4 11 972 972 1 972 1 972 972 0 100.000 100 100 plus 0.0 1796 +5 12 1098 1098 1 1098 1 1098 1098 0 100.000 100 100 plus 0.0 2028 +6 13 1281 1281 1 1281 1 1281 1281 11 99.141 100 100 plus 0.0 2305 +7 14 1434 1434 1 1434 1 1434 1434 0 100.000 100 100 plus 0.0 2649 +8 15 1464 1464 1 1464 1 1464 1464 15 98.975 100 100 plus 0.0 2621 +9 16 1836 1836 1 1836 1 1836 1836 0 100.000 100 100 plus 0.0 3391 +10 17 1914 1914 1 1914 1 1914 1914 0 100.000 100 100 plus 0.0 3535 +11 18 2037 2037 1 2037 1 2037 2037 16 99.215 100 100 plus 0.0 3674 +12 1 285 285 1 285 1 285 285 17 94.035 100 100 plus 2.31e-124 433 +13 19 4935 4935 1 4935 1 4935 4935 0 100.000 100 100 plus 0.0 9114 +14 2 327 327 1 327 1 327 327 0 100.000 100 100 plus 5.34e-176 604 +15 3 417 417 1 417 1 417 417 11 97.362 100 100 plus 0.0 710 +16 4 444 444 1 444 1 444 444 15 96.622 100 100 plus 0.0 737 +17 5 543 543 1 543 1 543 543 0 100.000 100 100 plus 0.0 1003 +18 6 606 606 1 606 1 606 606 15 97.525 100 100 plus 0.0 1037 +19 7 642 642 1 642 1 642 642 0 100.000 100 100 plus 0.0 1186 +20 8 684 684 1 684 1 684 684 0 100.000 100 100 plus 0.0 1264 diff --git a/tests/test_data/outputs/search/G11/nucleotide/queries.fasta b/tests/test_data/outputs/search/G11/nucleotide/queries.fasta new file mode 100755 index 0000000..1e6d0ea --- /dev/null +++ b/tests/test_data/outputs/search/G11/nucleotide/queries.fasta @@ -0,0 +1,42 @@ +>0 +atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa +>1 +atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa +>2 +gtgaacctggtgaaaaccacgggtctgcacgcggcgggcaaaggcattaacgttgccaaagtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccgacttcaacttttccggctttgacgtgactcccgcagactgggaacgctttgttaacgactccctgagctggctgggtcagttcgatatggtctgcgtcagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatctttgatagtagccgtgaagcgttagtcgccggtcttaaagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcacgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgtgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggtaagtaccgtcggcgcaggcgattccatggttggcggattgatttacggcctgctgatgcgcgagtccagcgaacatacgctgcgtctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga +>3 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>4 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>5 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>6 +ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctcttccgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccaacgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcagaaaggcgggattggtttgcgcggtccgggtgaaacccagctcgaaaccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgaaaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgctgctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaaatccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtctgcaggtacgaatgccgattgtcgactggcgtcgcctctgtaaacaagaaccagcgttgatcgaatacgtgatctag +>7 +gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga +>8 +atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctggtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatcagcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacgctgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcattcaaccggaatggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaacgctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgaggcgctggcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtgagctgaccagccttgtgcgcaaccttaatcaactgctcaaaagcgagcgcgaacgttataacaaataccgcacgaccctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatcagcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcataccccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttttggccgacagcatcccacacagaaagaggaataa +>9 +atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa +>10 +atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag +>11 +atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacattgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccaccgtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggccgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattactggaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcattcagggcggtacggaagtttgtcagcaaaaactggataccatgcgcaacgcaggcgtaaaagtgaacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggcgtcagcgcagagatcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacaggagaccggcaagctcggcgagatcctgttctttatgcgtgcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccggcaataactggcagttcgacggcgacgcggaaactattgcccactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgaaattaccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa +>12 +atgaaactttttctgacaacggcagcgctaactgcgaccttgatttccggcatggcatttgccagcgatcccgtcatcccctgggcgactaacagcggcggtacggaaagcacgcatattgcggcaatgggcgaggatttgaatgcgcagcatcagcagatcacccacacgcatgaaggcgtatgggcggccaattccggcagcattcaggctgatgaagccgcgctgaccagcaacaagccgccggtacaaggccacccggaattgatgccgcatcagggctaa +>13 +atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa +>14 +atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa +>15 +ctgaaactaatacctttttatttgctcgcgctattttctgccgcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaaccactgcagataacccgccttataccatgacaatattgccgccagtagaaggctgcggggaaaaatgtgtgaaactgaacgtacagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaaggacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctataaccgacagctgcaattactttgggctggcgtaataccgaaaaataaaaaggggaaatggataacgaaaatggaaaattactggttcaggcaggtaaaagcgcactaa +>16 +atgaaaaaattttatagctgtcttcctgtctttttactgatcggctgtgcccaggtgcccctcccttcctccgtgagcaaaccggtacagcaacctggcgctcagaaagagcaactggccaacgcaaatagtattgatgagtgtcagtctcttccgtatgtgccgtcagaccttgcgaagaataaatcattatcaaaccagaacgctgataattccgcatcaaaaaatagcgcaatcagctcaagcattttttgcgaaaaatataaacaaaccaaagaacaggcgcttaccttcttccaggaacatccacaatacatgcgttcgaaagaagatgaagagcaactcatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa +>17 +atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa +>18 +gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggcctcacaacatgcctggagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgtcagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcaccatccagtcggcaacctggttttccgcaggcaggtcgcttacggcggataaaaaaatgatggattgcggcagcggtatttatgcttcaatcaatacgttactgaaaaagtcgcagaataaaaatatagtcatttttacccataatcattgtttgacgtatattgcaaaaaataagcgcggcgtgaagtttgatccggactatcttaatgcgctcgttatgtatgcggaaaacggaaaactctttttagacggtgaatttgttcccggctag +>19 +atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga +>20 +atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/search/G11/protein/hsps.txt b/tests/test_data/outputs/search/G11/protein/hsps.txt new file mode 100755 index 0000000..f9da50e --- /dev/null +++ b/tests/test_data/outputs/search/G11/protein/hsps.txt @@ -0,0 +1,21 @@ +0 0 34 34 1 34 1 34 34 0 100.000 100 100 N/A 4.94e-20 64.3 +1 9 254 254 1 254 1 254 254 0 100.000 100 100 N/A 0.0 514 +2 10 286 286 1 286 1 286 286 17 94.056 100 100 N/A 0.0 518 +3 11 324 324 1 324 1 324 324 0 100.000 100 100 N/A 0.0 640 +4 11 324 324 1 324 1 324 324 0 100.000 100 100 N/A 0.0 640 +5 12 366 366 1 366 1 366 366 0 100.000 100 100 N/A 0.0 754 +6 13 427 427 1 427 1 427 427 9 97.892 100 100 N/A 0.0 846 +7 14 478 478 1 478 1 478 478 0 100.000 100 100 N/A 0.0 972 +8 15 488 488 1 488 1 488 488 14 97.131 100 100 N/A 0.0 972 +9 16 612 612 1 612 1 612 612 0 100.000 100 100 N/A 0.0 1241 +10 17 638 638 1 638 1 638 638 0 100.000 100 100 N/A 0.0 1326 +11 18 679 679 1 679 1 679 679 8 98.822 100 100 N/A 0.0 1403 +12 1 95 95 1 95 1 95 95 11 88.421 100 100 N/A 2.89e-51 147 +13 19 1645 1645 1 1645 1 1645 1645 0 100.000 100 100 N/A 0.0 3332 +14 2 109 109 1 109 1 109 109 0 100.000 100 100 N/A 3.09e-81 224 +15 3 139 139 1 139 1 139 139 9 93.525 100 100 N/A 3.70e-98 269 +16 4 148 148 1 148 1 148 148 11 92.568 100 100 N/A 1.56e-97 268 +17 5 181 181 1 181 1 181 181 0 100.000 100 100 N/A 1.79e-140 380 +18 6 202 202 1 202 1 202 202 14 93.069 100 100 N/A 2.36e-141 384 +19 7 214 214 1 214 1 214 214 0 100.000 100 100 N/A 1.75e-157 426 +20 8 228 228 1 228 1 228 228 0 100.000 100 100 N/A 6.32e-172 463 diff --git a/tests/test_data/outputs/search/G11/protein/queries.fasta b/tests/test_data/outputs/search/G11/protein/queries.fasta new file mode 100755 index 0000000..7d902a9 --- /dev/null +++ b/tests/test_data/outputs/search/G11/protein/queries.fasta @@ -0,0 +1,42 @@ +>0 +MYDPPFLEALMITASFFAIFIIIVVSVLLLEGD* +>1 +MRILLSNDDGVHAPGIQTLAKALREFADVQVVAPDRNRSGASNSLTLESSLRTFTFDNGDIAVQMGTPTDCVYLGVNALMRPRPDIVVSGINAGPNLGDDVIYSGTVAAAMEGRHLGFPALAVSLNGYQHYDTAAAVTCALLRGLSREPLRTGRILNVNVPDLPLAQVKGIRVTRCGSRHPADKVIPQEDPRGNTLYWIGPPGDKYDAGPDTDFAAVDEGYVSVTPLHVDLTAHSAHDVVSDWLDSVGVGTQW* +>2 +VNLVKTTGLHAAGKGINVAKVLKDLGIDVTVGGFLGKDNQDGFQQLFSELGIANRFQVVQGRTRINVKLTEKDGEVTDFNFSGFDVTPADWERFVNDSLSWLGQFDMVCVSGSLPAGVSPEAFTDWMTRLRSQCPCIIFDSSREALVAGLKAAPWLVKPNRRELEIWAGRKLPEMKDVIDAAHALREQGIAHVVISLGAEGALWVNASGEWIAKPPAVDVVSTVGAGDSMVGGLIYGLLMRESSEHTLRLATAVAALAVSQSNVGITDRPQLAAMMARVDLQPFN* +>3 +MNKNKYSTPLLMLATILAGMLSPMQSAVNGQLGHWLQDGNACAVISFASGLVVMFFIIIARKETRQQFASIPTLIKKRKIPLWNWFAGLCGAMVVFSEGASASALGVATFQTALISALLLSGLLCDRFGIGVEEKKYFTPWRITGALFAVIATIFVVSPQWHSTSFILLAILPFLAGLLAGWQPAGNAKVAEATGSMLVSITWNFIVGFCVLGAALAIRIALGHVTIQLPDTWWMYLGGPLGLLSIGLMAILVRGLGLLMLGVASTAGQLLGSVLIDELIPSLGNTVYLVTIIGTLFALVGAIVTTIPEYRASKMAQKMEVSE* +>4 +MNKNKYSTPLLMLATILAGMLSPMQSAVNGQLGHWLQDGNACAVISFASGLVVMFFIIIARKETRQQFASIPTLIKKRKIPLWNWFAGLCGAMVVFSEGASASALGVATFQTALISALLLSGLLCDRFGIGVEEKKYFTPWRITGALFAVIATIFVVSPQWHSTSFILLAILPFLAGLLAGWQPAGNAKVAEATGSMLVSITWNFIVGFCVLGAALAIRIALGHVTIQLPDTWWMYLGGPLGLLSIGLMAILVRGLGLLMLGVASTAGQLLGSVLIDELIPSLGNTVYLVTIIGTLFALVGAIVTTIPEYRASKMAQKMEVSE* +>5 +MKGRWAKYVATGVMLAMLAACSSKPTDRGQQYKDGKFTQPFSLVNQPDAVGAPINAGDFAEQVDQIRSASPRLYTNQSNVYNAVQNWLRSGGDTRTMRQFGIDAWQMEGTDNYGNVQFTGYYTPVVQARHTRQGAFQYPIYSMPPKRGRLPSRAQIYAGALSDKYILAWSNSLMDNFIMDVQGSGYIDFGDGSPLNFFSYAGKNGWPYRSIGKVLIDRGEVKKEDMSMQAIREWGEKHSEAEVRELLEQNPSFVFFKPQSFAPVKGASAVPLIGRASVASDRSIIPPGTTLLAEVPLLDNNGKFSGQYELRLMVALDVGGAIKGQHFDIYQGIGPDAGHRAGWYNHYGRVWVLKSAPGAGNVFSG* +>6 +LFDRYDAGEQAVLVHIYFSQDKDMEDLQEFESLVSSAGVEAMQVITGSRKAPHPKYFVGEGKAVEIAEAVKATGAAVVLFDHALSPAQERNLERLCECRVIDRTGLILDIFAQRARTHEGKLQVELAQLRHLATRLVRGWTHLERQKGGIGLRGPGETQLETDRRLLRNRIVQIQSRLEKVEKQREQGRQSRIKADVPTVSLVGYTNAGKSTLFNQITEARVYAADQLFATLDPTLRRIDVADVGETVLADTVGFIRHLPYDLVAAFKATLQETRQATLLLHVVDAADVRVQENIEAVNTVLEEIDAHEIPTLMVMNKIDMLDDFEPRIDRDEENKPIRVWLSAQSGVGIPQLFQALTERLSGEVAQHTLRLLPQEGRLRSRFYQLQAIEKEWMEEDGSVSLQVRMPIVDWRRLCKQEPALIEYVI* +>7 +VAQRILVLGASGYIGQHLVFALSQQGHQVRAAARRVERLEKHRLANVSCHKVDLHWPENLPALLRDIDTVYYLVHGMGEGGDFIAHERQAALNVRDALRQTPVKQLIFLSSLQAPAHEQSDHLRARQLTADTLRDAGVPVTELRAGIIVGAGSAAFEVMRDMVYNLPILTPPRWVRSRTTPIALENLLYYLVGLLEHPAHEHRILEAAGPQVLSYQQQFERFMAVSGKRRPLIPVPFPTRWISVWFLNVITSVPPTTAKALIQGLRHDLLADDAALKKLIPQTLITFDDAVRRTLKEEEKLVNSSDWGYDALAFARWRPEYGYFPKQAGFTAQTPASLSALWQVVNRLGGKEGYFFGNILWQTRAAMDRLVGHKLAKGRPSHTLLKPGDTVDSWKVIIVEPEKQLTLLFGMKAPGLGRLSFTLHDKGRYREIDVRAWWHPHGMPGLIYWLLMIPAHLFIFRGMARRIARLAEQITEK* +>8 +MNKFARHFLPLSLRVRFLLATAGVVLVLSLAYGIVALVGYSVSFDKTTFRLLRGESNLFYTLAKWENNKISVELPENLDMQSPTMTLIYDETGKLLWTQRNIPWLIKSIQPEWLKTNGFHEIETNVDATSTLLSEDHSAQEKLKEVREDDDDAEMTHSVAVNIYPATTRMPQLTIVVVDTIPIELKRSYMVWSWFVYVLAANLLLVIPLLWIAAWWSLRPIEALAREVRELEDHHREMLNPETTRELTSLVRNLNQLLKSERERYNKYRTTLTDLTHSLKTPLAVLQSTLRSLRNEKMSVSKAEPVMLEQISRISQQIGYYLHRASMRGSGVLLSRELHPVAPLLDNLISALNKVYQRKGVNISMDISPEISFVGEQNDFVEVMGNVLDNACKYCLEFVEISARQTDDHLHIFVEDDGPGIPHSKRSLVFDRGQRADTLRPGQGVGLAVAREITEQYAGQIIASDSLLGGARMEVVFGRQHPTQKEE* +>9 +MTIQKRLLEAVEQKLLRPIDAQFALTVAGNDDPAVTLAAALLSHDAGEGHVCLPLSRLTLTEEAHPLLVAWISETATPIDWKKRLLASAAVSCGDSPAPLILCGDRLYLNRMWCNERTVARFFNEVNQAIAVDEDQLSRILDALFPPTDEVNWQKVAAAVALTRRISVISGGPGTGKTTTVAKLLAALIQMADGERCRIRLAAPTGKAAARLTESLGAALRQLPLTDAQKKRIPEDASTLHRLLGAQPGSQRLRHHAGNPLHLDVLVVDEASMIDLPMMSRLIDALPPHGRVIFLGDRDQLASVEAGAVLGDICAYVNAGFTAERARQLSRLTGSAIPAGAGTQAASLRDSLCLLQKSYRFGSDSGIGKLAAAINCGDRSAIQAVFQQGFSDIEKRTLQSSDDYAGMLDEALAGYGRYLRLLHEKAAPEAILQAFNEYQLLCALREGPFGVRGLNDRIEQAMVQQRKIQRHPHSRWYEGRPVMIARNDSALGLFNGDIGIALDRGQGLRVWFVMPDGTIKSVQPSRLPEHDTTWAMTVHKSQGSEFDHAALILPSQRSPVVTRELVYTAVTRARRRLSLYADERILAGAIVTRTERRSGLATLFDEVSRIG* +>10 +MQEVAMSSQEASKMLRTYNIAWWGNNYYDVNELGHISVCPDPDVPEARVDLAKLVKAREAQGQRLPALFCFPQILQHRLRSINAAFKRARESYGYNGDYFLVYPIKVNQHRRVIESLIHSGEPLGLEAGSKAELMAVLAHAGMTRSVIVCNGYKDREYIRLALIGEKMGHKVYLVIEKMSEIAIVLEEAERLNVVPRLGVRARLASQGSGKWQSSGGEKSKFGLAATQVLQLVETLRDAGRLDSLQLLHFHLGSQMANIRDIATGVRESARFYVELHKLGVNIQCFDVGGGLGVDYEGTRSQSDCSVNYGLNEYANNIIWAIGDACEEHGLPHPTVITESGRAVTAHHTVLVSNIIGVERNEYTDPTAPAEDAPRALQNLWETWQEMHKPGTRRSLREWLHDSQMDLHDIHIGYSSGAFSLQERAWAEQLYLSMCHEVQKQLDPQNRAHRPIIDELQERMADKMYVNFSLFQSMPDAWGIDQLFPVLPLEGLDQVPERRAVLLDITCDSDGAIDHYIDGDGIATTMPMPEYDPENPPMLGFFMVGAYQEILGNMHNLFGDTEAVDVFVFPDGSVEVELSDEGDTVADMLQYVQLDPKTLLTHFRDQVKQTDLDDALQQQFLEEFEAGLYGYTYLEDE* +>11 +MNSLPQRSTDFELTTSQDGFALSWQQRLILRHSAENPCLWIGAGVADIDMFRGNFSIKDKLNEKIALTEATVSELPDGWLVQFSRGATISATLRISADEAGRLTLDLQNDDLHHNRIWLRLAANPDDHIYGCGEQFSYFDLRGKPFPLWTSEQGVGRNKTSYVTWQADCKENAGGDYYWTFFPQPTFVSTQKYYCHVDNSCYMNFDFSAPEYHELALWEDKTTLRFECADTYIALLEKLTALLGRQPELPDWVYDGVTLGIQGGTEVCQQKLDTMRNAGVKVNGIWAQDWSGIRMTSFGKRVMWNWKWNSDNYPQLDSRIKQWKEEGVQFLSYINPYVASDKDLCAEAARHGYLAKDATGGDYLVEFGEFYGGVVDLTNPEAYDWFKDVIKKNMIALGCSGWMADFGEYLPTDTYLHNGVSAEIMHNAWPALWAKCNYEALQETGKLGEILFFMRAGYTGSQKYSTMMWAGDQNVDWSLDDGLASVVPAALSLAMTGHGLHHSDIGGYTTLFDMKRSKELLLRWCDFSAFTPMMRTHEGNRPGNNWQFDGDAETIAHFARMTTVFTTLKPYLKQAVAQNAATGLPVMRPLFLHYENDAATYTLKYQYLLGQDLLVAPVHEQGRCDWTLYLPEDHWVNIWTGEAHHGGEITVDAPIGKPPVFYRAKSEWALLFASLRNI* +>12 +MKLFLTTAALTATLISGMAFASDPVIPWATNSGGTESTHIAAMGEDLNAQHQQITHTHEGVWAANSGSIQADEAALTSNKPPVQGHPELMPHQG* +>13 +MKHLRVVACMIMLALAGCDNNDKTAPTTKSEAPAVAQPSPAQDPSQLQKLAQQSQGKALTLLDASEAQLDGAATLVLTFSIPLDPEQDFSRVVHVVDKKSGSVDGAWELAPNLKELRLRHLEPERVLVVTVDPAVKALNNATFGKSYEKTITTRDVQPSVGFASRGSLLPGKIAEGLPVMALNVNHVDVNFFRVKPGSLASFVSQWEYRSSLSNWESDNLLKMADLVYTGRFDLNPARNTREKLLLPLSDIKPLQQAGVYVAVMNQAGHYNYSNAATLFTLSDIGVSAHRYHSRLDIFTQSLENGAAQSGIEIVLLNDKGQTLAQATSDAQGHVQLEADKAAALLLARKEGQTTLLDLTLPALDLSEFNVAGAPGYSKQFFMFGPRDLYRPGETVILNGLLRDSDGKTLPDQPVKLEVVKPDGQVMRTVVSQPENGLYRLNYPLDINAPTGLWHVRANTGDNLLRSWDFHVEDFMPERMALNLTAQKTPLAPADEVKFSVVGYYLYGAPANGNTLQGQLFLRPLRDAVAALPGFQFGNIAEENLSRSLDEVQLTLDKGGRGEVSAASQWQEAHSPLQVILQASLLESGGRPVTRRVEQAIWPADTLPGIRPQFAAKAVYDYRTDTTVNQPIVDEDSNAAFDIVYANAQGEKKAVSGLQVRLIRERRDYYWNWSESEGWQSQFDQKDLVEGEQTLDLNADETGKVSFPVEWGAYRLEVKAPNETVSSVRFWAGYSWQDNSDGSGAARPDRVTLKLDKANYRPGDTMKLHIAAPVAGKGYAMVESSDGPLWWQAIDVPAQGLELTIPVDKTWNRHDLYLSTLVVRPGDKSRSATPKRAVGLLHLPLGDDNRRLDLALESPAKMRPNQPLTVRVKASVKHGEMPKQINVLVSAVDSGVLNITDYATPDPWQAFFGQKRYGADIYDIYGQVIEGQGRLAALRFGGDGDDLTRGGKPPVNHANIIAQQAQPITLNEQGEGVVTLPIGDFNGELRVMAQAWTADDFGRGESKVVVAAPVIAELNMPRFLAGGDVSRLVLDVTNLTDRPQTLNIALAASGLLELLSQQPQPVNLAPGVRTTLFVPVRALEGFGEGEIQATISGLNLPGETLGAQHKQWQIGVRPAWPAQTVNSGIALAPGESWHVPEQHLANVSPATLQGQLLLSGKPPLNLARYIRELKAYPYGCLEQTTSGLFPALYTNAAQLQSLGITGDSDEKRRAAVDIGISRILQMQRDNGGFALWDENGAEEPWLTAYAMDFLIRAGEQGYSVPPEAINRGNERLLRYLQDPGTMLIRYSDNTQASTFAAQAYAALVLARQQKAPLGALREIWERRSQAASGLPLMQLGIALNTMGDARRGEEAITLALNTPRQDERQWIADYGSSLRDNALMLSLLEENNLRPDAQNALLSSLSEQAFGQRWLSTQENNALFLAAHSRQASAGAWQAQTSLEAQPLSGDKALTRNLDADQLAALEVTNTGSQPLWLRLDSSGYPSSAPEPASNVLQIERQILGTDGQRKSLSSLRSGELVLVWLTVVADRNVPDALVVDLLPAGLELENQNLADSSASLPESGSEVQNLLNQMQQADIQYMEFRDDRFVAAVVVNEGQPVTLVYLARAVTPGTYQLAQPQVESMYAPQWRATGASEGLLIVTP* +>14 +MAKNRSRRLRKKMHIDEFQELGFSVAWRFPEGTSEEQIDKTVDDFINDVIEPNKLAFDGSGYLAWEGLICMQEIGKCTEEHQAIVRKWLEARNLEEVRTSELFDVWWD* +>15 +LKLIPFYLLALFSAASGATEINACKDLIGTWKTTADNPPYTMTILPPVEGCGEKCVKLNVQYELDVTHRNALYCHEGQEGVKGQGPMVIAFEGAYGGHAIGTYNRQLQLLWAGVIPKNKKGKWITKMENYWFRQVKAH* +>16 +MKKFYSCLPVFLLIGCAQVPLPSSVSKPVQQPGAQKEQLANANSIDECQSLPYVPSDLAKNKSLSNQNADNSASKNSAISSSIFCEKYKQTKEQALTFFQEHPQYMRSKEDEEQLMTEFKKVLLEPGSKNLSIYQTLLAAHERLQAL* +>17 +MRIKPDDNWRWYYDEEHDRMMLDLANGMLFRSRFSRKMLTPDAFCPTGFCVDDAALYFSFEEKCRDFELTKEQRAELVLNALVAIRYLKPQMPKSWHFVAHGEMWTPGTGDAASVWLSDTAEQVNLLVVEPGENAALCLLAQPGVVIAGRTMQLGDAIKIMNDRLKPQVHCHSFSLEQAV* +>18 +VLAFTLRFIKNKRYFAILAGALVIIAGLASQHAWSGNGLPQINGKALAALAKQHPVVVLFRHAERCDRSDNTCLSDSTGITVNGAQDARALGKAFSADIQNYNLYSSNTVRTIQSATWFSAGRSLTADKKMMDCGSGIYASINTLLKKSQNKNIVIFTHNHCLTYIAKNKRGVKFDPDYLNALVMYAENGKLFLDGEFVPG* +>19 +MGSNYIVIEGLEGAGKTTARDVVVETLEQLGIRNMIFTREPGGTQLAEKLRSLVLDIRSVGDEVITDKAEVLMFYAARVQLVETVIKPALAQGVWVIGDRHDLSTQAYQGGGRGIDQTMLATLRDAVLGDFRPDLTLYLDVTPEVGLKRARARGDLDRIEQESFDFFNRTRARYLELAAQDSRIRTIDATQPLDAVMRDIRATVTKWVQEQAA* +>20 +MKHIKKSVLVVLLTSHVAHASIVVGGTRLVFDGNNDESSINVENKDSKANLVQSWLSVADPQVTNKQVFIITPPLFRLDAGQKNSIRVIRSGAPLPADRESMYWLNIKGIPSIDDNASANRVEISINTQIKLIYRPPALTKSTPDSQSQQLKWQTAGDVITVNNPTPYYMNFASVTLNSHEVKSATFVPPKSSASFKLSSTAAPHGTVTWRLISDYGMSLEPHSGSF* diff --git a/tests/test_data/outputs/search/G12/nucleotide/hsps.txt b/tests/test_data/outputs/search/G12/nucleotide/hsps.txt new file mode 100755 index 0000000..9ebac93 --- /dev/null +++ b/tests/test_data/outputs/search/G12/nucleotide/hsps.txt @@ -0,0 +1,21 @@ +0 0 102 102 1 102 1 102 102 0 100.000 100 100 plus 1.80e-51 189 +1 9 762 762 1 762 1 762 762 0 100.000 100 100 plus 0.0 1408 +2 10 858 858 1 858 1 858 858 0 100.000 100 100 plus 0.0 1585 +3 11 972 972 1 972 1 972 972 0 100.000 100 100 plus 0.0 1796 +4 11 972 972 1 972 1 972 972 0 100.000 100 100 plus 0.0 1796 +5 12 1098 1098 1 1098 1 1098 1098 0 100.000 100 100 plus 0.0 2028 +6 13 1281 1281 1 1281 1 1281 1281 0 100.000 100 100 plus 0.0 2366 +7 14 1434 1434 1 1434 1 1434 1434 0 100.000 100 100 plus 0.0 2649 +8 15 1464 1464 1 1464 1 1464 1464 0 100.000 100 100 plus 0.0 2704 +9 16 1836 1836 1 1836 1 1836 1836 0 100.000 100 100 plus 0.0 3391 +10 17 1914 1914 1 1914 1 1914 1914 0 100.000 100 100 plus 0.0 3535 +11 18 2037 2037 1 2037 1 2037 2037 0 100.000 100 100 plus 0.0 3762 +12 1 285 285 1 285 1 285 285 0 100.000 100 100 plus 1.03e-152 527 +13 19 4935 4935 1 4935 1 4935 4935 0 100.000 100 100 plus 0.0 9114 +14 2 327 327 1 327 1 327 327 0 100.000 100 100 plus 5.34e-176 604 +15 3 417 417 1 417 1 417 417 0 100.000 100 100 plus 0.0 771 +16 4 444 444 1 444 1 444 444 0 100.000 100 100 plus 0.0 821 +17 5 543 543 1 543 1 543 543 0 100.000 100 100 plus 0.0 1003 +18 6 606 606 1 606 1 606 606 0 100.000 100 100 plus 0.0 1120 +19 7 642 642 1 642 1 642 642 0 100.000 100 100 plus 0.0 1186 +20 8 684 684 1 684 1 684 684 0 100.000 100 100 plus 0.0 1264 diff --git a/tests/test_data/outputs/search/G12/nucleotide/queries.fasta b/tests/test_data/outputs/search/G12/nucleotide/queries.fasta new file mode 100755 index 0000000..80602ca --- /dev/null +++ b/tests/test_data/outputs/search/G12/nucleotide/queries.fasta @@ -0,0 +1,42 @@ +>0 +atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa +>1 +atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa +>2 +gtgaacctggtgaaaaccccgggtctgcacgcggcgggcaaaggcattaacgttgccaatgtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccaacttcaactttcccggctttgacgtgactcccgcagactgggaacgctttgttaacgacttcctgagctggctgggtcagttcgatatggtctgcgtaagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatcgttgatactagccgtgtagagttagtcgccggtcttatagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcccgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgcgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggaaaataccgtcggcgcaggcggttccatggttggcggatggatttacggcctgctgatgcgcgagtccaccgaacatacgctgcgcctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga +>3 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>4 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>5 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>6 +ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctattacgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccagcgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcggaaaggcgggattggttcgcgcggtccgggtgaaacccagctcgaagccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgagaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgcagctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaattccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtccgcaggtacgaatgccgattgtcgactggcgtcgcttctgtaaacaagaaccagcgttgatcgaatacgtgatctag +>7 +gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga +>8 +atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctcgtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatctgcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacggtgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcactcaaccggaagggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaaggctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgatgcgctgtcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtaagctgatcagccttgtgcgcaaccttaatcaactgctcaaaagcgagcacgaacgttataacaaataccgcacgagcctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatctgcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcatagcccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttctggccgacagcatcccacacagaaagaggaataa +>9 +atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa +>10 +atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag +>11 +atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacatcgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccacagtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggtcgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattacttgaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcatacagggcggtacggaagtttgtcagcaaaaactggataacatgcgcaacgcaggcgtaaaagtgtacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggggtcagcgcagagctcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacagaagaccggcaagctcggcgagatcctgttctttatgcgggcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccagcaatgactggcagttcgacggcgacgcggaaactattgcacactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgagatttccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa +>12 +atgaaactttttctgacaacggcagcgctaactgcgaccttgacttccggcatgggatttgccagcgataccgtgatcccctgggcgactaacagtggcggtacggaaagcacgcatatcgcggcaatgggcgaggatgtgaatgcgcagcatcagcacatcatccacacgcatgaaggcgtatgtgccgcaaattccggcaccattcaggctgatgaagccgcgctgaccagcaacaagccaccggtacaagtccaaccggaattgttgccgcatcagggctaa +>13 +atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa +>14 +atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa +>15 +ctgaaactaatacctttttatttgctcgcgctattttctgcctcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaacccctgcagataacccgccttataccgtgacaatattgccgccagtagaagcctgcggggaaaaatgtgtgaaactgaacgtagagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaacgacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctacaaccgacagctgcaattacttagggctggcgtaataccgaaaaataaaaagtggaaatggataacgaagatggaaaattactggttcaggcgggtaaaagcgcactaa +>16 +atgaaaaaatttgatagctgtcatcctgtctttttactgatcggctgtgcccaggtgcctctccgttcctccgtgagcaaaccggtacagcaacctagcgctcagaaagagcaactggccaacgcaaatggtattgatgagtgtcagtcacttccgtatgtgccttcagaccttgcgaagaataaatcattatcaaaccagatcgctgataataccgcatcaaaaaatagcgcaatcagctcacgcattttttgcgaaaaatataagcaaaccaaagaacaggcgcttaccttcttccaggaacttccacaatacatgcgttcgaaagaagttgaagagcaacacatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa +>17 +atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa +>18 +gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggactcacaacatgcctgtagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgccagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcgccatccagtcggcaacctggttttcggcaggcaggtcgcgtagggcggttaaaaaaatgatggattgcggcagcggtatttattcttcaatcaatacgttactgaaaaattcgcagattaaaaatatagtcattttaacccataatcattgtttgacgtatattgtaaaaaataagcgcggcgtgaagtttgatccggagtatcttaatgcgctcgttatgtatgcggaaaacggaaaactccttttagacggtgaatttgttcccggctag +>19 +atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga +>20 +atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/search/G12/protein/hsps.txt b/tests/test_data/outputs/search/G12/protein/hsps.txt new file mode 100755 index 0000000..553fd81 --- /dev/null +++ b/tests/test_data/outputs/search/G12/protein/hsps.txt @@ -0,0 +1,21 @@ +0 0 34 34 1 34 1 34 34 0 100.000 100 100 N/A 4.94e-20 64.3 +1 9 254 254 1 254 1 254 254 0 100.000 100 100 N/A 0.0 514 +2 10 286 286 1 286 1 286 286 0 100.000 100 100 N/A 0.0 579 +3 11 324 324 1 324 1 324 324 0 100.000 100 100 N/A 0.0 640 +4 11 324 324 1 324 1 324 324 0 100.000 100 100 N/A 0.0 640 +5 12 366 366 1 366 1 366 366 0 100.000 100 100 N/A 0.0 754 +6 13 427 427 1 427 1 427 427 0 100.000 100 100 N/A 0.0 869 +7 14 478 478 1 478 1 478 478 0 100.000 100 100 N/A 0.0 972 +8 15 488 488 1 488 1 488 488 0 100.000 100 100 N/A 0.0 1004 +9 16 612 612 1 612 1 612 612 0 100.000 100 100 N/A 0.0 1241 +10 17 638 638 1 638 1 638 638 0 100.000 100 100 N/A 0.0 1326 +11 18 679 679 1 679 1 679 679 0 100.000 100 100 N/A 0.0 1419 +12 1 95 95 1 95 1 95 95 0 100.000 100 100 N/A 1.23e-68 191 +13 19 1645 1645 1 1645 1 1645 1645 0 100.000 100 100 N/A 0.0 3332 +14 2 109 109 1 109 1 109 109 0 100.000 100 100 N/A 3.09e-81 224 +15 3 139 139 1 139 1 139 139 0 100.000 100 100 N/A 2.34e-106 290 +16 4 148 148 1 148 1 148 148 0 100.000 100 100 N/A 1.04e-110 302 +17 5 181 181 1 181 1 181 181 0 100.000 100 100 N/A 1.79e-140 380 +18 6 202 202 1 202 1 202 202 0 100.000 100 100 N/A 3.71e-154 416 +19 7 214 214 1 214 1 214 214 0 100.000 100 100 N/A 1.75e-157 426 +20 8 228 228 1 228 1 228 228 0 100.000 100 100 N/A 6.32e-172 463 diff --git a/tests/test_data/outputs/search/G12/protein/queries.fasta b/tests/test_data/outputs/search/G12/protein/queries.fasta new file mode 100755 index 0000000..a75c35c --- /dev/null +++ b/tests/test_data/outputs/search/G12/protein/queries.fasta @@ -0,0 +1,42 @@ +>0 +MYDPPFLEALMITASFFAIFIIIVVSVLLLEGD* +>1 +MRILLSNDDGVHAPGIQTLAKALREFADVQVVAPDRNRSGASNSLTLESSLRTFTFDNGDIAVQMGTPTDCVYLGVNALMRPRPDIVVSGINAGPNLGDDVIYSGTVAAAMEGRHLGFPALAVSLNGYQHYDTAAAVTCALLRGLSREPLRTGRILNVNVPDLPLAQVKGIRVTRCGSRHPADKVIPQEDPRGNTLYWIGPPGDKYDAGPDTDFAAVDEGYVSVTPLHVDLTAHSAHDVVSDWLDSVGVGTQW* +>2 +VNLVKTPGLHAAGKGINVANVLKDLGIDVTVGGFLGKDNQDGFQQLFSELGIANRFQVVQGRTRINVKLTEKDGEVTNFNFPGFDVTPADWERFVNDFLSWLGQFDMVCVSGSLPAGVSPEAFTDWMTRLRSQCPCIIVDTSRVELVAGLIAAPWLVKPNRRELEIWAGRKLPEMKDVIDAAPALREQGIAHVVISLGAEGALRVNASGEWIAKPPAVDVENTVGAGGSMVGGWIYGLLMRESTEHTLRLATAVAALAVSQSNVGITDRPQLAAMMARVDLQPFN* +>3 +MNKNKYSTPLLMLATILAGMLSPMQSAVNGQLGHWLQDGNACAVISFASGLVVMFFIIIARKETRQQFASIPTLIKKRKIPLWNWFAGLCGAMVVFSEGASASALGVATFQTALISALLLSGLLCDRFGIGVEEKKYFTPWRITGALFAVIATIFVVSPQWHSTSFILLAILPFLAGLLAGWQPAGNAKVAEATGSMLVSITWNFIVGFCVLGAALAIRIALGHVTIQLPDTWWMYLGGPLGLLSIGLMAILVRGLGLLMLGVASTAGQLLGSVLIDELIPSLGNTVYLVTIIGTLFALVGAIVTTIPEYRASKMAQKMEVSE* +>4 +MNKNKYSTPLLMLATILAGMLSPMQSAVNGQLGHWLQDGNACAVISFASGLVVMFFIIIARKETRQQFASIPTLIKKRKIPLWNWFAGLCGAMVVFSEGASASALGVATFQTALISALLLSGLLCDRFGIGVEEKKYFTPWRITGALFAVIATIFVVSPQWHSTSFILLAILPFLAGLLAGWQPAGNAKVAEATGSMLVSITWNFIVGFCVLGAALAIRIALGHVTIQLPDTWWMYLGGPLGLLSIGLMAILVRGLGLLMLGVASTAGQLLGSVLIDELIPSLGNTVYLVTIIGTLFALVGAIVTTIPEYRASKMAQKMEVSE* +>5 +MKGRWAKYVATGVMLAMLAACSSKPTDRGQQYKDGKFTQPFSLVNQPDAVGAPINAGDFAEQVDQIRSASPRLYTNQSNVYNAVQNWLRSGGDTRTMRQFGIDAWQMEGTDNYGNVQFTGYYTPVVQARHTRQGAFQYPIYSMPPKRGRLPSRAQIYAGALSDKYILAWSNSLMDNFIMDVQGSGYIDFGDGSPLNFFSYAGKNGWPYRSIGKVLIDRGEVKKEDMSMQAIREWGEKHSEAEVRELLEQNPSFVFFKPQSFAPVKGASAVPLIGRASVASDRSIIPPGTTLLAEVPLLDNNGKFSGQYELRLMVALDVGGAIKGQHFDIYQGIGPDAGHRAGWYNHYGRVWVLKSAPGAGNVFSG* +>6 +LFDRYDAGEQAVLVHIYFSQDKDMEDLQEFESLVYYAGVEAMQVITGSRKAPHPKYFVGEGKAVEIAEAVKATGAAVVLFDHALSPAQERNLERLCECRVIDRTGLILDIFAQRARTHEGKLQVELAQLRHLATRLVRGWTHLERRKGGIGSRGPGETQLEADRRLLRNRIVQIQSRLEKVEKQREQGRQSRIKADVPTVSLVGYTNAGKSTLFNQITEARVYAADQLFATLDPTLRRIDVADVGETVLADTVGFIRHLPYDLVAAFKATLQETRQATQLLHVVDAADVRVQENIEAVNTVLEEIDAHEFPTLMVMNKIDMLDDFEPRIDRDEENKPIRVWLSAQSGVGIPQLFQALTERLSGEVAQHTLRLLPQEGRLRSRFYQLQAIEKEWMEEDGSVSPQVRMPIVDWRRFCKQEPALIEYVI* +>7 +VAQRILVLGASGYIGQHLVFALSQQGHQVRAAARRVERLEKHRLANVSCHKVDLHWPENLPALLRDIDTVYYLVHGMGEGGDFIAHERQAALNVRDALRQTPVKQLIFLSSLQAPAHEQSDHLRARQLTADTLRDAGVPVTELRAGIIVGAGSAAFEVMRDMVYNLPILTPPRWVRSRTTPIALENLLYYLVGLLEHPAHEHRILEAAGPQVLSYQQQFERFMAVSGKRRPLIPVPFPTRWISVWFLNVITSVPPTTAKALIQGLRHDLLADDAALKKLIPQTLITFDDAVRRTLKEEEKLVNSSDWGYDALAFARWRPEYGYFPKQAGFTAQTPASLSALWQVVNRLGGKEGYFFGNILWQTRAAMDRLVGHKLAKGRPSHTLLKPGDTVDSWKVIIVEPEKQLTLLFGMKAPGLGRLSFTLHDKGRYREIDVRAWWHPHGMPGLIYWLLMIPAHLFIFRGMARRIARLAEQITEK* +>8 +MNKFARHFLPLSLRVRFLLATAGVVLVLSLAYGIVALVGYSVSFDKTTFRLLRGESNLFYTLAKWENNKICVELPENLDMQSPTMTVIYDETGKLLWTQRNIPWLIKSTQPEGLKTNGFHEIETNVDATSTLLSEDHSAQEKLKEVREDDDDAEMTHSVAVNIYPATTRMPQLTIVVVDTIPIELKGSYMVWSWFVYVLAANLLLVIPLLWIAAWWSLRPIDALSREVRELEDHHREMLNPETTRKLISLVRNLNQLLKSEHERYNKYRTSLTDLTHSLKTPLAVLQSTLRSLRNEKMSVSKAEPVMLEQISRISQQIGYYLHRASMRGSGVLLSRELHPVAPLLDNLISALNKVYLRKGVNISMDISPEISFVGEQNDFVEVMGNVLDNACKYCLEFVEISARQTDDHLHIFVEDDGPGIAHSKRSLVFDRGQRADTLRPGQGVGLAVAREITEQYAGQIIASDSLLGGARMEVVSGRQHPTQKEE* +>9 +MTIQKRLLEAVEQKLLRPIDAQFALTVAGNDDPAVTLAAALLSHDAGEGHVCLPLSRLTLTEEAHPLLVAWISETATPIDWKKRLLASAAVSCGDSPAPLILCGDRLYLNRMWCNERTVARFFNEVNQAIAVDEDQLSRILDALFPPTDEVNWQKVAAAVALTRRISVISGGPGTGKTTTVAKLLAALIQMADGERCRIRLAAPTGKAAARLTESLGAALRQLPLTDAQKKRIPEDASTLHRLLGAQPGSQRLRHHAGNPLHLDVLVVDEASMIDLPMMSRLIDALPPHGRVIFLGDRDQLASVEAGAVLGDICAYVNAGFTAERARQLSRLTGSAIPAGAGTQAASLRDSLCLLQKSYRFGSDSGIGKLAAAINCGDRSAIQAVFQQGFSDIEKRTLQSSDDYAGMLDEALAGYGRYLRLLHEKAAPEAILQAFNEYQLLCALREGPFGVRGLNDRIEQAMVQQRKIQRHPHSRWYEGRPVMIARNDSALGLFNGDIGIALDRGQGLRVWFVMPDGTIKSVQPSRLPEHDTTWAMTVHKSQGSEFDHAALILPSQRSPVVTRELVYTAVTRARRRLSLYADERILAGAIVTRTERRSGLATLFDEVSRIG* +>10 +MQEVAMSSQEASKMLRTYNIAWWGNNYYDVNELGHISVCPDPDVPEARVDLAKLVKAREAQGQRLPALFCFPQILQHRLRSINAAFKRARESYGYNGDYFLVYPIKVNQHRRVIESLIHSGEPLGLEAGSKAELMAVLAHAGMTRSVIVCNGYKDREYIRLALIGEKMGHKVYLVIEKMSEIAIVLEEAERLNVVPRLGVRARLASQGSGKWQSSGGEKSKFGLAATQVLQLVETLRDAGRLDSLQLLHFHLGSQMANIRDIATGVRESARFYVELHKLGVNIQCFDVGGGLGVDYEGTRSQSDCSVNYGLNEYANNIIWAIGDACEEHGLPHPTVITESGRAVTAHHTVLVSNIIGVERNEYTDPTAPAEDAPRALQNLWETWQEMHKPGTRRSLREWLHDSQMDLHDIHIGYSSGAFSLQERAWAEQLYLSMCHEVQKQLDPQNRAHRPIIDELQERMADKMYVNFSLFQSMPDAWGIDQLFPVLPLEGLDQVPERRAVLLDITCDSDGAIDHYIDGDGIATTMPMPEYDPENPPMLGFFMVGAYQEILGNMHNLFGDTEAVDVFVFPDGSVEVELSDEGDTVADMLQYVQLDPKTLLTHFRDQVKQTDLDDALQQQFLEEFEAGLYGYTYLEDE* +>11 +MNSLPQRSTDFELTTSQDGFALSWQQRLILRHSAENPCLWIGAGVADIDMFRGNFSIKDKLNEKIALTEATVSELPDGWLVQFSRGATISATLRISADEAGRLTLDLQNDDLHHNRIWLRLAANPDDHIYGCGEQFSYFDLRGKPFPLWTSEQGVGRNKTSYVTWQADCKENAGGDYYLTFFPQPTFVSTQKYYCHVDNSCYMNFDFSAPEYHELALWEDKTTLRFECADTYIALLEKLTALLGRQPELPDWVYDGVTLGIQGGTEVCQQKLDNMRNAGVKVYGIWAQDWSGIRMTSFGKRVMWNWKWNSDNYPQLDSRIKQWKEEGVQFLSYINPYVASDKDLCAEAARHGYLAKDATGGDYLVEFGEFYGGVVDLTNPEAYDWFKDVIKKNMIALGCSGWMADFGEYLPTDTYLHNGVSAELMHNAWPALWAKCNYEALQKTGKLGEILFFMRAGYTGSQKYSTMMWAGDQNVDWSLDDGLASVVPAALSLAMTGHGLHHSDIGGYTTLFDMKRSKELLLRWCDFSAFTPMMRTHEGNRPSNDWQFDGDAETIAHFARMTTVFTTLKPYLKQAVAQNAATGLPVMRPLFLHYENDAATYTLKYQYLLGQDLLVAPVHEQGRCDWTLYLPEDHWVNIWTGEAHHGGEISVDAPIGKPPVFYRAKSEWALLFASLRNI* +>12 +MKLFLTTAALTATLTSGMGFASDTVIPWATNSGGTESTHIAAMGEDVNAQHQHIIHTHEGVCAANSGTIQADEAALTSNKPPVQVQPELLPHQG* +>13 +MKHLRVVACMIMLALAGCDNNDKTAPTTKSEAPAVAQPSPAQDPSQLQKLAQQSQGKALTLLDASEAQLDGAATLVLTFSIPLDPEQDFSRVVHVVDKKSGSVDGAWELAPNLKELRLRHLEPERVLVVTVDPAVKALNNATFGKSYEKTITTRDVQPSVGFASRGSLLPGKIAEGLPVMALNVNHVDVNFFRVKPGSLASFVSQWEYRSSLSNWESDNLLKMADLVYTGRFDLNPARNTREKLLLPLSDIKPLQQAGVYVAVMNQAGHYNYSNAATLFTLSDIGVSAHRYHSRLDIFTQSLENGAAQSGIEIVLLNDKGQTLAQATSDAQGHVQLEADKAAALLLARKEGQTTLLDLTLPALDLSEFNVAGAPGYSKQFFMFGPRDLYRPGETVILNGLLRDSDGKTLPDQPVKLEVVKPDGQVMRTVVSQPENGLYRLNYPLDINAPTGLWHVRANTGDNLLRSWDFHVEDFMPERMALNLTAQKTPLAPADEVKFSVVGYYLYGAPANGNTLQGQLFLRPLRDAVAALPGFQFGNIAEENLSRSLDEVQLTLDKGGRGEVSAASQWQEAHSPLQVILQASLLESGGRPVTRRVEQAIWPADTLPGIRPQFAAKAVYDYRTDTTVNQPIVDEDSNAAFDIVYANAQGEKKAVSGLQVRLIRERRDYYWNWSESEGWQSQFDQKDLVEGEQTLDLNADETGKVSFPVEWGAYRLEVKAPNETVSSVRFWAGYSWQDNSDGSGAARPDRVTLKLDKANYRPGDTMKLHIAAPVAGKGYAMVESSDGPLWWQAIDVPAQGLELTIPVDKTWNRHDLYLSTLVVRPGDKSRSATPKRAVGLLHLPLGDDNRRLDLALESPAKMRPNQPLTVRVKASVKHGEMPKQINVLVSAVDSGVLNITDYATPDPWQAFFGQKRYGADIYDIYGQVIEGQGRLAALRFGGDGDDLTRGGKPPVNHANIIAQQAQPITLNEQGEGVVTLPIGDFNGELRVMAQAWTADDFGRGESKVVVAAPVIAELNMPRFLAGGDVSRLVLDVTNLTDRPQTLNIALAASGLLELLSQQPQPVNLAPGVRTTLFVPVRALEGFGEGEIQATISGLNLPGETLGAQHKQWQIGVRPAWPAQTVNSGIALAPGESWHVPEQHLANVSPATLQGQLLLSGKPPLNLARYIRELKAYPYGCLEQTTSGLFPALYTNAAQLQSLGITGDSDEKRRAAVDIGISRILQMQRDNGGFALWDENGAEEPWLTAYAMDFLIRAGEQGYSVPPEAINRGNERLLRYLQDPGTMLIRYSDNTQASTFAAQAYAALVLARQQKAPLGALREIWERRSQAASGLPLMQLGIALNTMGDARRGEEAITLALNTPRQDERQWIADYGSSLRDNALMLSLLEENNLRPDAQNALLSSLSEQAFGQRWLSTQENNALFLAAHSRQASAGAWQAQTSLEAQPLSGDKALTRNLDADQLAALEVTNTGSQPLWLRLDSSGYPSSAPEPASNVLQIERQILGTDGQRKSLSSLRSGELVLVWLTVVADRNVPDALVVDLLPAGLELENQNLADSSASLPESGSEVQNLLNQMQQADIQYMEFRDDRFVAAVVVNEGQPVTLVYLARAVTPGTYQLAQPQVESMYAPQWRATGASEGLLIVTP* +>14 +MAKNRSRRLRKKMHIDEFQELGFSVAWRFPEGTSEEQIDKTVDDFINDVIEPNKLAFDGSGYLAWEGLICMQEIGKCTEEHQAIVRKWLEARNLEEVRTSELFDVWWD* +>15 +LKLIPFYLLALFSASSGATEINACKDLIGTWKTPADNPPYTVTILPPVEACGEKCVKLNVEYELDVTHRNALYCHERQEGVKGQGPMVIAFEGAYGGHAIGTYNRQLQLLRAGVIPKNKKWKWITKMENYWFRRVKAH* +>16 +MKKFDSCHPVFLLIGCAQVPLRSSVSKPVQQPSAQKEQLANANGIDECQSLPYVPSDLAKNKSLSNQIADNTASKNSAISSRIFCEKYKQTKEQALTFFQELPQYMRSKEVEEQHMTEFKKVLLEPGSKNLSIYQTLLAAHERLQAL* +>17 +MRIKPDDNWRWYYDEEHDRMMLDLANGMLFRSRFSRKMLTPDAFCPTGFCVDDAALYFSFEEKCRDFELTKEQRAELVLNALVAIRYLKPQMPKSWHFVAHGEMWTPGTGDAASVWLSDTAEQVNLLVVEPGENAALCLLAQPGVVIAGRTMQLGDAIKIMNDRLKPQVHCHSFSLEQAV* +>18 +VLAFTLRFIKNKRYFAILAGALVIIAGLDSQHACSGNGLPQINGKALAALAKQHPVVVLFRHAERCDRSDNTCLPDSTGITVNGAQDARALGKAFSADIQNYNLYSSNTVRAIQSATWFSAGRSRRAVKKMMDCGSGIYSSINTLLKNSQIKNIVILTHNHCLTYIVKNKRGVKFDPEYLNALVMYAENGKLLLDGEFVPG* +>19 +MGSNYIVIEGLEGAGKTTARDVVVETLEQLGIRNMIFTREPGGTQLAEKLRSLVLDIRSVGDEVITDKAEVLMFYAARVQLVETVIKPALAQGVWVIGDRHDLSTQAYQGGGRGIDQTMLATLRDAVLGDFRPDLTLYLDVTPEVGLKRARARGDLDRIEQESFDFFNRTRARYLELAAQDSRIRTIDATQPLDAVMRDIRATVTKWVQEQAA* +>20 +MKHIKKSVLVVLLTSHVAHASIVVGGTRLVFDGNNDESSINVENKDSKANLVQSWLSVADPQVTNKQVFIITPPLFRLDAGQKNSIRVIRSGAPLPADRESMYWLNIKGIPSIDDNASANRVEISINTQIKLIYRPPALTKSTPDSQSQQLKWQTAGDVITVNNPTPYYMNFASVTLNSHEVKSATFVPPKSSASFKLSSTAAPHGTVTWRLISDYGMSLEPHSGSF* diff --git a/tests/test_data/outputs/search/G13/nucleotide/hsps.txt b/tests/test_data/outputs/search/G13/nucleotide/hsps.txt new file mode 100755 index 0000000..a515bcf --- /dev/null +++ b/tests/test_data/outputs/search/G13/nucleotide/hsps.txt @@ -0,0 +1,21 @@ +0 0 102 102 1 102 1 102 102 0 100.000 100 100 plus 1.80e-51 189 +1 9 762 762 1 762 1 762 762 0 100.000 100 100 plus 0.0 1408 +2 10 858 858 1 858 1 858 858 19 97.786 100 100 plus 0.0 1480 +3 11 972 972 1 972 1 972 972 0 100.000 100 100 plus 0.0 1796 +4 12 1098 1098 1 1098 1 1098 1098 0 100.000 100 100 plus 0.0 2028 +5 12 1098 1098 1 1098 1 1098 1098 0 100.000 100 100 plus 0.0 2028 +6 13 1281 1281 1 1281 1 1281 1281 11 99.141 100 100 plus 0.0 2305 +7 14 1434 1434 1 1434 1 1434 1434 0 100.000 100 100 plus 0.0 2649 +8 15 1464 1464 1 1464 1 1464 1464 15 98.975 100 100 plus 0.0 2621 +9 16 1836 1836 1 1836 1 1836 1836 0 100.000 100 100 plus 0.0 3391 +10 17 1914 1914 1 1914 1 1914 1914 0 100.000 100 100 plus 0.0 3535 +11 18 2037 2037 1 2037 1 2037 2037 16 99.215 100 100 plus 0.0 3674 +12 1 285 285 1 285 1 285 285 17 94.035 100 100 plus 2.31e-124 433 +13 19 4935 4935 1 4935 1 4935 4935 0 100.000 100 100 plus 0.0 9114 +14 2 327 327 1 327 1 327 327 0 100.000 100 100 plus 5.34e-176 604 +15 3 417 417 1 417 1 417 417 11 97.362 100 100 plus 0.0 710 +16 4 444 444 1 444 1 444 444 15 96.622 100 100 plus 0.0 737 +17 5 543 543 1 543 1 543 543 0 100.000 100 100 plus 0.0 1003 +18 6 606 606 1 606 1 606 606 15 97.525 100 100 plus 0.0 1037 +19 7 642 642 1 642 1 642 642 0 100.000 100 100 plus 0.0 1186 +20 8 684 684 1 684 1 684 684 0 100.000 100 100 plus 0.0 1264 diff --git a/tests/test_data/outputs/search/G13/nucleotide/queries.fasta b/tests/test_data/outputs/search/G13/nucleotide/queries.fasta new file mode 100755 index 0000000..00e5ec3 --- /dev/null +++ b/tests/test_data/outputs/search/G13/nucleotide/queries.fasta @@ -0,0 +1,42 @@ +>0 +atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa +>1 +atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa +>2 +gtgaacctggtgaaaaccacgggtctgcacgcggcgggcaaaggcattaacgttgccaaagtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccgacttcaacttttccggctttgacgtgactcccgcagactgggaacgctttgttaacgactccctgagctggctgggtcagttcgatatggtctgcgtcagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatctttgatagtagccgtgaagcgttagtcgccggtcttaaagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcacgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgtgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggtaagtaccgtcggcgcaggcgattccatggttggcggattgatttacggcctgctgatgcgcgagtccagcgaacatacgctgcgtctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga +>3 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>4 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>5 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>6 +ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctcttccgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccaacgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcagaaaggcgggattggtttgcgcggtccgggtgaaacccagctcgaaaccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgaaaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgctgctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaaatccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtctgcaggtacgaatgccgattgtcgactggcgtcgcctctgtaaacaagaaccagcgttgatcgaatacgtgatctag +>7 +gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga +>8 +atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctggtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatcagcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacgctgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcattcaaccggaatggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaacgctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgaggcgctggcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtgagctgaccagccttgtgcgcaaccttaatcaactgctcaaaagcgagcgcgaacgttataacaaataccgcacgaccctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatcagcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcataccccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttttggccgacagcatcccacacagaaagaggaataa +>9 +atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa +>10 +atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag +>11 +atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacattgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccaccgtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggccgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattactggaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcattcagggcggtacggaagtttgtcagcaaaaactggataccatgcgcaacgcaggcgtaaaagtgaacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggcgtcagcgcagagatcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacaggagaccggcaagctcggcgagatcctgttctttatgcgtgcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccggcaataactggcagttcgacggcgacgcggaaactattgcccactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgaaattaccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa +>12 +atgaaactttttctgacaacggcagcgctaactgcgaccttgatttccggcatggcatttgccagcgatcccgtcatcccctgggcgactaacagcggcggtacggaaagcacgcatattgcggcaatgggcgaggatttgaatgcgcagcatcagcagatcacccacacgcatgaaggcgtatgggcggccaattccggcagcattcaggctgatgaagccgcgctgaccagcaacaagccgccggtacaaggccacccggaattgatgccgcatcagggctaa +>13 +atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa +>14 +atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa +>15 +ctgaaactaatacctttttatttgctcgcgctattttctgccgcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaaccactgcagataacccgccttataccatgacaatattgccgccagtagaaggctgcggggaaaaatgtgtgaaactgaacgtacagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaaggacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctataaccgacagctgcaattactttgggctggcgtaataccgaaaaataaaaaggggaaatggataacgaaaatggaaaattactggttcaggcaggtaaaagcgcactaa +>16 +atgaaaaaattttatagctgtcttcctgtctttttactgatcggctgtgcccaggtgcccctcccttcctccgtgagcaaaccggtacagcaacctggcgctcagaaagagcaactggccaacgcaaatagtattgatgagtgtcagtctcttccgtatgtgccgtcagaccttgcgaagaataaatcattatcaaaccagaacgctgataattccgcatcaaaaaatagcgcaatcagctcaagcattttttgcgaaaaatataaacaaaccaaagaacaggcgcttaccttcttccaggaacatccacaatacatgcgttcgaaagaagatgaagagcaactcatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa +>17 +atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa +>18 +gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggcctcacaacatgcctggagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgtcagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcaccatccagtcggcaacctggttttccgcaggcaggtcgcttacggcggataaaaaaatgatggattgcggcagcggtatttatgcttcaatcaatacgttactgaaaaagtcgcagaataaaaatatagtcatttttacccataatcattgtttgacgtatattgcaaaaaataagcgcggcgtgaagtttgatccggactatcttaatgcgctcgttatgtatgcggaaaacggaaaactctttttagacggtgaatttgttcccggctag +>19 +atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga +>20 +atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/search/G13/protein/hsps.txt b/tests/test_data/outputs/search/G13/protein/hsps.txt new file mode 100755 index 0000000..b0cf610 --- /dev/null +++ b/tests/test_data/outputs/search/G13/protein/hsps.txt @@ -0,0 +1,21 @@ +0 0 34 34 1 34 1 34 34 0 100.000 100 100 N/A 4.94e-20 64.3 +1 9 254 254 1 254 1 254 254 0 100.000 100 100 N/A 0.0 514 +2 10 286 286 1 286 1 286 286 17 94.056 100 100 N/A 0.0 518 +3 11 324 324 1 324 1 324 324 0 100.000 100 100 N/A 0.0 640 +4 12 366 366 1 366 1 366 366 0 100.000 100 100 N/A 0.0 754 +5 12 366 366 1 366 1 366 366 0 100.000 100 100 N/A 0.0 754 +6 13 427 427 1 427 1 427 427 9 97.892 100 100 N/A 0.0 846 +7 14 478 478 1 478 1 478 478 0 100.000 100 100 N/A 0.0 972 +8 15 488 488 1 488 1 488 488 14 97.131 100 100 N/A 0.0 972 +9 16 612 612 1 612 1 612 612 0 100.000 100 100 N/A 0.0 1241 +10 17 638 638 1 638 1 638 638 0 100.000 100 100 N/A 0.0 1326 +11 18 679 679 1 679 1 679 679 8 98.822 100 100 N/A 0.0 1403 +12 1 95 95 1 95 1 95 95 11 88.421 100 100 N/A 2.89e-51 147 +13 19 1645 1645 1 1645 1 1645 1645 0 100.000 100 100 N/A 0.0 3332 +14 2 109 109 1 109 1 109 109 0 100.000 100 100 N/A 3.09e-81 224 +15 3 139 139 1 139 1 139 139 9 93.525 100 100 N/A 3.70e-98 269 +16 4 148 148 1 148 1 148 148 11 92.568 100 100 N/A 1.56e-97 268 +17 5 181 181 1 181 1 181 181 0 100.000 100 100 N/A 1.79e-140 380 +18 6 202 202 1 202 1 202 202 14 93.069 100 100 N/A 2.36e-141 384 +19 7 214 214 1 214 1 214 214 0 100.000 100 100 N/A 1.75e-157 426 +20 8 228 228 1 228 1 228 228 0 100.000 100 100 N/A 6.32e-172 463 diff --git a/tests/test_data/outputs/search/G13/protein/queries.fasta b/tests/test_data/outputs/search/G13/protein/queries.fasta new file mode 100755 index 0000000..7cb4274 --- /dev/null +++ b/tests/test_data/outputs/search/G13/protein/queries.fasta @@ -0,0 +1,42 @@ +>0 +MYDPPFLEALMITASFFAIFIIIVVSVLLLEGD* +>1 +MRILLSNDDGVHAPGIQTLAKALREFADVQVVAPDRNRSGASNSLTLESSLRTFTFDNGDIAVQMGTPTDCVYLGVNALMRPRPDIVVSGINAGPNLGDDVIYSGTVAAAMEGRHLGFPALAVSLNGYQHYDTAAAVTCALLRGLSREPLRTGRILNVNVPDLPLAQVKGIRVTRCGSRHPADKVIPQEDPRGNTLYWIGPPGDKYDAGPDTDFAAVDEGYVSVTPLHVDLTAHSAHDVVSDWLDSVGVGTQW* +>2 +VNLVKTTGLHAAGKGINVAKVLKDLGIDVTVGGFLGKDNQDGFQQLFSELGIANRFQVVQGRTRINVKLTEKDGEVTDFNFSGFDVTPADWERFVNDSLSWLGQFDMVCVSGSLPAGVSPEAFTDWMTRLRSQCPCIIFDSSREALVAGLKAAPWLVKPNRRELEIWAGRKLPEMKDVIDAAHALREQGIAHVVISLGAEGALWVNASGEWIAKPPAVDVVSTVGAGDSMVGGLIYGLLMRESSEHTLRLATAVAALAVSQSNVGITDRPQLAAMMARVDLQPFN* +>3 +MNKNKYSTPLLMLATILAGMLSPMQSAVNGQLGHWLQDGNACAVISFASGLVVMFFIIIARKETRQQFASIPTLIKKRKIPLWNWFAGLCGAMVVFSEGASASALGVATFQTALISALLLSGLLCDRFGIGVEEKKYFTPWRITGALFAVIATIFVVSPQWHSTSFILLAILPFLAGLLAGWQPAGNAKVAEATGSMLVSITWNFIVGFCVLGAALAIRIALGHVTIQLPDTWWMYLGGPLGLLSIGLMAILVRGLGLLMLGVASTAGQLLGSVLIDELIPSLGNTVYLVTIIGTLFALVGAIVTTIPEYRASKMAQKMEVSE* +>4 +MKGRWAKYVATGVMLAMLAACSSKPTDRGQQYKDGKFTQPFSLVNQPDAVGAPINAGDFAEQVDQIRSASPRLYTNQSNVYNAVQNWLRSGGDTRTMRQFGIDAWQMEGTDNYGNVQFTGYYTPVVQARHTRQGAFQYPIYSMPPKRGRLPSRAQIYAGALSDKYILAWSNSLMDNFIMDVQGSGYIDFGDGSPLNFFSYAGKNGWPYRSIGKVLIDRGEVKKEDMSMQAIREWGEKHSEAEVRELLEQNPSFVFFKPQSFAPVKGASAVPLIGRASVASDRSIIPPGTTLLAEVPLLDNNGKFSGQYELRLMVALDVGGAIKGQHFDIYQGIGPDAGHRAGWYNHYGRVWVLKSAPGAGNVFSG* +>5 +MKGRWAKYVATGVMLAMLAACSSKPTDRGQQYKDGKFTQPFSLVNQPDAVGAPINAGDFAEQVDQIRSASPRLYTNQSNVYNAVQNWLRSGGDTRTMRQFGIDAWQMEGTDNYGNVQFTGYYTPVVQARHTRQGAFQYPIYSMPPKRGRLPSRAQIYAGALSDKYILAWSNSLMDNFIMDVQGSGYIDFGDGSPLNFFSYAGKNGWPYRSIGKVLIDRGEVKKEDMSMQAIREWGEKHSEAEVRELLEQNPSFVFFKPQSFAPVKGASAVPLIGRASVASDRSIIPPGTTLLAEVPLLDNNGKFSGQYELRLMVALDVGGAIKGQHFDIYQGIGPDAGHRAGWYNHYGRVWVLKSAPGAGNVFSG* +>6 +LFDRYDAGEQAVLVHIYFSQDKDMEDLQEFESLVSSAGVEAMQVITGSRKAPHPKYFVGEGKAVEIAEAVKATGAAVVLFDHALSPAQERNLERLCECRVIDRTGLILDIFAQRARTHEGKLQVELAQLRHLATRLVRGWTHLERQKGGIGLRGPGETQLETDRRLLRNRIVQIQSRLEKVEKQREQGRQSRIKADVPTVSLVGYTNAGKSTLFNQITEARVYAADQLFATLDPTLRRIDVADVGETVLADTVGFIRHLPYDLVAAFKATLQETRQATLLLHVVDAADVRVQENIEAVNTVLEEIDAHEIPTLMVMNKIDMLDDFEPRIDRDEENKPIRVWLSAQSGVGIPQLFQALTERLSGEVAQHTLRLLPQEGRLRSRFYQLQAIEKEWMEEDGSVSLQVRMPIVDWRRLCKQEPALIEYVI* +>7 +VAQRILVLGASGYIGQHLVFALSQQGHQVRAAARRVERLEKHRLANVSCHKVDLHWPENLPALLRDIDTVYYLVHGMGEGGDFIAHERQAALNVRDALRQTPVKQLIFLSSLQAPAHEQSDHLRARQLTADTLRDAGVPVTELRAGIIVGAGSAAFEVMRDMVYNLPILTPPRWVRSRTTPIALENLLYYLVGLLEHPAHEHRILEAAGPQVLSYQQQFERFMAVSGKRRPLIPVPFPTRWISVWFLNVITSVPPTTAKALIQGLRHDLLADDAALKKLIPQTLITFDDAVRRTLKEEEKLVNSSDWGYDALAFARWRPEYGYFPKQAGFTAQTPASLSALWQVVNRLGGKEGYFFGNILWQTRAAMDRLVGHKLAKGRPSHTLLKPGDTVDSWKVIIVEPEKQLTLLFGMKAPGLGRLSFTLHDKGRYREIDVRAWWHPHGMPGLIYWLLMIPAHLFIFRGMARRIARLAEQITEK* +>8 +MNKFARHFLPLSLRVRFLLATAGVVLVLSLAYGIVALVGYSVSFDKTTFRLLRGESNLFYTLAKWENNKISVELPENLDMQSPTMTLIYDETGKLLWTQRNIPWLIKSIQPEWLKTNGFHEIETNVDATSTLLSEDHSAQEKLKEVREDDDDAEMTHSVAVNIYPATTRMPQLTIVVVDTIPIELKRSYMVWSWFVYVLAANLLLVIPLLWIAAWWSLRPIEALAREVRELEDHHREMLNPETTRELTSLVRNLNQLLKSERERYNKYRTTLTDLTHSLKTPLAVLQSTLRSLRNEKMSVSKAEPVMLEQISRISQQIGYYLHRASMRGSGVLLSRELHPVAPLLDNLISALNKVYQRKGVNISMDISPEISFVGEQNDFVEVMGNVLDNACKYCLEFVEISARQTDDHLHIFVEDDGPGIPHSKRSLVFDRGQRADTLRPGQGVGLAVAREITEQYAGQIIASDSLLGGARMEVVFGRQHPTQKEE* +>9 +MTIQKRLLEAVEQKLLRPIDAQFALTVAGNDDPAVTLAAALLSHDAGEGHVCLPLSRLTLTEEAHPLLVAWISETATPIDWKKRLLASAAVSCGDSPAPLILCGDRLYLNRMWCNERTVARFFNEVNQAIAVDEDQLSRILDALFPPTDEVNWQKVAAAVALTRRISVISGGPGTGKTTTVAKLLAALIQMADGERCRIRLAAPTGKAAARLTESLGAALRQLPLTDAQKKRIPEDASTLHRLLGAQPGSQRLRHHAGNPLHLDVLVVDEASMIDLPMMSRLIDALPPHGRVIFLGDRDQLASVEAGAVLGDICAYVNAGFTAERARQLSRLTGSAIPAGAGTQAASLRDSLCLLQKSYRFGSDSGIGKLAAAINCGDRSAIQAVFQQGFSDIEKRTLQSSDDYAGMLDEALAGYGRYLRLLHEKAAPEAILQAFNEYQLLCALREGPFGVRGLNDRIEQAMVQQRKIQRHPHSRWYEGRPVMIARNDSALGLFNGDIGIALDRGQGLRVWFVMPDGTIKSVQPSRLPEHDTTWAMTVHKSQGSEFDHAALILPSQRSPVVTRELVYTAVTRARRRLSLYADERILAGAIVTRTERRSGLATLFDEVSRIG* +>10 +MQEVAMSSQEASKMLRTYNIAWWGNNYYDVNELGHISVCPDPDVPEARVDLAKLVKAREAQGQRLPALFCFPQILQHRLRSINAAFKRARESYGYNGDYFLVYPIKVNQHRRVIESLIHSGEPLGLEAGSKAELMAVLAHAGMTRSVIVCNGYKDREYIRLALIGEKMGHKVYLVIEKMSEIAIVLEEAERLNVVPRLGVRARLASQGSGKWQSSGGEKSKFGLAATQVLQLVETLRDAGRLDSLQLLHFHLGSQMANIRDIATGVRESARFYVELHKLGVNIQCFDVGGGLGVDYEGTRSQSDCSVNYGLNEYANNIIWAIGDACEEHGLPHPTVITESGRAVTAHHTVLVSNIIGVERNEYTDPTAPAEDAPRALQNLWETWQEMHKPGTRRSLREWLHDSQMDLHDIHIGYSSGAFSLQERAWAEQLYLSMCHEVQKQLDPQNRAHRPIIDELQERMADKMYVNFSLFQSMPDAWGIDQLFPVLPLEGLDQVPERRAVLLDITCDSDGAIDHYIDGDGIATTMPMPEYDPENPPMLGFFMVGAYQEILGNMHNLFGDTEAVDVFVFPDGSVEVELSDEGDTVADMLQYVQLDPKTLLTHFRDQVKQTDLDDALQQQFLEEFEAGLYGYTYLEDE* +>11 +MNSLPQRSTDFELTTSQDGFALSWQQRLILRHSAENPCLWIGAGVADIDMFRGNFSIKDKLNEKIALTEATVSELPDGWLVQFSRGATISATLRISADEAGRLTLDLQNDDLHHNRIWLRLAANPDDHIYGCGEQFSYFDLRGKPFPLWTSEQGVGRNKTSYVTWQADCKENAGGDYYWTFFPQPTFVSTQKYYCHVDNSCYMNFDFSAPEYHELALWEDKTTLRFECADTYIALLEKLTALLGRQPELPDWVYDGVTLGIQGGTEVCQQKLDTMRNAGVKVNGIWAQDWSGIRMTSFGKRVMWNWKWNSDNYPQLDSRIKQWKEEGVQFLSYINPYVASDKDLCAEAARHGYLAKDATGGDYLVEFGEFYGGVVDLTNPEAYDWFKDVIKKNMIALGCSGWMADFGEYLPTDTYLHNGVSAEIMHNAWPALWAKCNYEALQETGKLGEILFFMRAGYTGSQKYSTMMWAGDQNVDWSLDDGLASVVPAALSLAMTGHGLHHSDIGGYTTLFDMKRSKELLLRWCDFSAFTPMMRTHEGNRPGNNWQFDGDAETIAHFARMTTVFTTLKPYLKQAVAQNAATGLPVMRPLFLHYENDAATYTLKYQYLLGQDLLVAPVHEQGRCDWTLYLPEDHWVNIWTGEAHHGGEITVDAPIGKPPVFYRAKSEWALLFASLRNI* +>12 +MKLFLTTAALTATLISGMAFASDPVIPWATNSGGTESTHIAAMGEDLNAQHQQITHTHEGVWAANSGSIQADEAALTSNKPPVQGHPELMPHQG* +>13 +MKHLRVVACMIMLALAGCDNNDKTAPTTKSEAPAVAQPSPAQDPSQLQKLAQQSQGKALTLLDASEAQLDGAATLVLTFSIPLDPEQDFSRVVHVVDKKSGSVDGAWELAPNLKELRLRHLEPERVLVVTVDPAVKALNNATFGKSYEKTITTRDVQPSVGFASRGSLLPGKIAEGLPVMALNVNHVDVNFFRVKPGSLASFVSQWEYRSSLSNWESDNLLKMADLVYTGRFDLNPARNTREKLLLPLSDIKPLQQAGVYVAVMNQAGHYNYSNAATLFTLSDIGVSAHRYHSRLDIFTQSLENGAAQSGIEIVLLNDKGQTLAQATSDAQGHVQLEADKAAALLLARKEGQTTLLDLTLPALDLSEFNVAGAPGYSKQFFMFGPRDLYRPGETVILNGLLRDSDGKTLPDQPVKLEVVKPDGQVMRTVVSQPENGLYRLNYPLDINAPTGLWHVRANTGDNLLRSWDFHVEDFMPERMALNLTAQKTPLAPADEVKFSVVGYYLYGAPANGNTLQGQLFLRPLRDAVAALPGFQFGNIAEENLSRSLDEVQLTLDKGGRGEVSAASQWQEAHSPLQVILQASLLESGGRPVTRRVEQAIWPADTLPGIRPQFAAKAVYDYRTDTTVNQPIVDEDSNAAFDIVYANAQGEKKAVSGLQVRLIRERRDYYWNWSESEGWQSQFDQKDLVEGEQTLDLNADETGKVSFPVEWGAYRLEVKAPNETVSSVRFWAGYSWQDNSDGSGAARPDRVTLKLDKANYRPGDTMKLHIAAPVAGKGYAMVESSDGPLWWQAIDVPAQGLELTIPVDKTWNRHDLYLSTLVVRPGDKSRSATPKRAVGLLHLPLGDDNRRLDLALESPAKMRPNQPLTVRVKASVKHGEMPKQINVLVSAVDSGVLNITDYATPDPWQAFFGQKRYGADIYDIYGQVIEGQGRLAALRFGGDGDDLTRGGKPPVNHANIIAQQAQPITLNEQGEGVVTLPIGDFNGELRVMAQAWTADDFGRGESKVVVAAPVIAELNMPRFLAGGDVSRLVLDVTNLTDRPQTLNIALAASGLLELLSQQPQPVNLAPGVRTTLFVPVRALEGFGEGEIQATISGLNLPGETLGAQHKQWQIGVRPAWPAQTVNSGIALAPGESWHVPEQHLANVSPATLQGQLLLSGKPPLNLARYIRELKAYPYGCLEQTTSGLFPALYTNAAQLQSLGITGDSDEKRRAAVDIGISRILQMQRDNGGFALWDENGAEEPWLTAYAMDFLIRAGEQGYSVPPEAINRGNERLLRYLQDPGTMLIRYSDNTQASTFAAQAYAALVLARQQKAPLGALREIWERRSQAASGLPLMQLGIALNTMGDARRGEEAITLALNTPRQDERQWIADYGSSLRDNALMLSLLEENNLRPDAQNALLSSLSEQAFGQRWLSTQENNALFLAAHSRQASAGAWQAQTSLEAQPLSGDKALTRNLDADQLAALEVTNTGSQPLWLRLDSSGYPSSAPEPASNVLQIERQILGTDGQRKSLSSLRSGELVLVWLTVVADRNVPDALVVDLLPAGLELENQNLADSSASLPESGSEVQNLLNQMQQADIQYMEFRDDRFVAAVVVNEGQPVTLVYLARAVTPGTYQLAQPQVESMYAPQWRATGASEGLLIVTP* +>14 +MAKNRSRRLRKKMHIDEFQELGFSVAWRFPEGTSEEQIDKTVDDFINDVIEPNKLAFDGSGYLAWEGLICMQEIGKCTEEHQAIVRKWLEARNLEEVRTSELFDVWWD* +>15 +LKLIPFYLLALFSAASGATEINACKDLIGTWKTTADNPPYTMTILPPVEGCGEKCVKLNVQYELDVTHRNALYCHEGQEGVKGQGPMVIAFEGAYGGHAIGTYNRQLQLLWAGVIPKNKKGKWITKMENYWFRQVKAH* +>16 +MKKFYSCLPVFLLIGCAQVPLPSSVSKPVQQPGAQKEQLANANSIDECQSLPYVPSDLAKNKSLSNQNADNSASKNSAISSSIFCEKYKQTKEQALTFFQEHPQYMRSKEDEEQLMTEFKKVLLEPGSKNLSIYQTLLAAHERLQAL* +>17 +MRIKPDDNWRWYYDEEHDRMMLDLANGMLFRSRFSRKMLTPDAFCPTGFCVDDAALYFSFEEKCRDFELTKEQRAELVLNALVAIRYLKPQMPKSWHFVAHGEMWTPGTGDAASVWLSDTAEQVNLLVVEPGENAALCLLAQPGVVIAGRTMQLGDAIKIMNDRLKPQVHCHSFSLEQAV* +>18 +VLAFTLRFIKNKRYFAILAGALVIIAGLASQHAWSGNGLPQINGKALAALAKQHPVVVLFRHAERCDRSDNTCLSDSTGITVNGAQDARALGKAFSADIQNYNLYSSNTVRTIQSATWFSAGRSLTADKKMMDCGSGIYASINTLLKKSQNKNIVIFTHNHCLTYIAKNKRGVKFDPDYLNALVMYAENGKLFLDGEFVPG* +>19 +MGSNYIVIEGLEGAGKTTARDVVVETLEQLGIRNMIFTREPGGTQLAEKLRSLVLDIRSVGDEVITDKAEVLMFYAARVQLVETVIKPALAQGVWVIGDRHDLSTQAYQGGGRGIDQTMLATLRDAVLGDFRPDLTLYLDVTPEVGLKRARARGDLDRIEQESFDFFNRTRARYLELAAQDSRIRTIDATQPLDAVMRDIRATVTKWVQEQAA* +>20 +MKHIKKSVLVVLLTSHVAHASIVVGGTRLVFDGNNDESSINVENKDSKANLVQSWLSVADPQVTNKQVFIITPPLFRLDAGQKNSIRVIRSGAPLPADRESMYWLNIKGIPSIDDNASANRVEISINTQIKLIYRPPALTKSTPDSQSQQLKWQTAGDVITVNNPTPYYMNFASVTLNSHEVKSATFVPPKSSASFKLSSTAAPHGTVTWRLISDYGMSLEPHSGSF* diff --git a/tests/test_data/outputs/search/G14/nucleotide/hsps.txt b/tests/test_data/outputs/search/G14/nucleotide/hsps.txt new file mode 100755 index 0000000..3324a8b --- /dev/null +++ b/tests/test_data/outputs/search/G14/nucleotide/hsps.txt @@ -0,0 +1,21 @@ +0 0 102 102 1 102 1 102 102 0 100.000 100 100 plus 1.80e-51 189 +1 9 762 762 1 762 1 762 762 0 100.000 100 100 plus 0.0 1408 +2 10 858 858 1 858 1 858 858 0 100.000 100 100 plus 0.0 1585 +3 11 972 972 1 972 1 972 972 0 100.000 100 100 plus 0.0 1796 +4 12 1098 1098 1 1098 1 1098 1098 0 100.000 100 100 plus 0.0 2028 +5 12 1098 1098 1 1098 1 1098 1098 0 100.000 100 100 plus 0.0 2028 +6 13 1281 1281 1 1281 1 1281 1281 0 100.000 100 100 plus 0.0 2366 +7 14 1434 1434 1 1434 1 1434 1434 0 100.000 100 100 plus 0.0 2649 +8 15 1464 1464 1 1464 1 1464 1464 0 100.000 100 100 plus 0.0 2704 +9 16 1836 1836 1 1836 1 1836 1836 0 100.000 100 100 plus 0.0 3391 +10 17 1914 1914 1 1914 1 1914 1914 0 100.000 100 100 plus 0.0 3535 +11 18 2037 2037 1 2037 1 2037 2037 0 100.000 100 100 plus 0.0 3762 +12 1 285 285 1 285 1 285 285 0 100.000 100 100 plus 1.03e-152 527 +13 19 4935 4935 1 4935 1 4935 4935 0 100.000 100 100 plus 0.0 9114 +14 2 327 327 1 327 1 327 327 0 100.000 100 100 plus 5.34e-176 604 +15 3 417 417 1 417 1 417 417 0 100.000 100 100 plus 0.0 771 +16 4 444 444 1 444 1 444 444 0 100.000 100 100 plus 0.0 821 +17 5 543 543 1 543 1 543 543 0 100.000 100 100 plus 0.0 1003 +18 6 606 606 1 606 1 606 606 0 100.000 100 100 plus 0.0 1120 +19 7 642 642 1 642 1 642 642 0 100.000 100 100 plus 0.0 1186 +20 8 684 684 1 684 1 684 684 0 100.000 100 100 plus 0.0 1264 diff --git a/tests/test_data/outputs/search/G14/nucleotide/queries.fasta b/tests/test_data/outputs/search/G14/nucleotide/queries.fasta new file mode 100755 index 0000000..2da4d02 --- /dev/null +++ b/tests/test_data/outputs/search/G14/nucleotide/queries.fasta @@ -0,0 +1,42 @@ +>0 +atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa +>1 +atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa +>2 +gtgaacctggtgaaaaccccgggtctgcacgcggcgggcaaaggcattaacgttgccaatgtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccaacttcaactttcccggctttgacgtgactcccgcagactgggaacgctttgttaacgacttcctgagctggctgggtcagttcgatatggtctgcgtaagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatcgttgatactagccgtgtagagttagtcgccggtcttatagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcccgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgcgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggaaaataccgtcggcgcaggcggttccatggttggcggatggatttacggcctgctgatgcgcgagtccaccgaacatacgctgcgcctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga +>3 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>4 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>5 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>6 +ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctattacgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccagcgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcggaaaggcgggattggttcgcgcggtccgggtgaaacccagctcgaagccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgagaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgcagctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaattccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtccgcaggtacgaatgccgattgtcgactggcgtcgcttctgtaaacaagaaccagcgttgatcgaatacgtgatctag +>7 +gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga +>8 +atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctcgtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatctgcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacggtgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcactcaaccggaagggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaaggctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgatgcgctgtcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtaagctgatcagccttgtgcgcaaccttaatcaactgctcaaaagcgagcacgaacgttataacaaataccgcacgagcctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatctgcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcatagcccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttctggccgacagcatcccacacagaaagaggaataa +>9 +atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa +>10 +atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag +>11 +atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacatcgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccacagtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggtcgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattacttgaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcatacagggcggtacggaagtttgtcagcaaaaactggataacatgcgcaacgcaggcgtaaaagtgtacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggggtcagcgcagagctcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacagaagaccggcaagctcggcgagatcctgttctttatgcgggcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccagcaatgactggcagttcgacggcgacgcggaaactattgcacactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgagatttccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa +>12 +atgaaactttttctgacaacggcagcgctaactgcgaccttgacttccggcatgggatttgccagcgataccgtgatcccctgggcgactaacagtggcggtacggaaagcacgcatatcgcggcaatgggcgaggatgtgaatgcgcagcatcagcacatcatccacacgcatgaaggcgtatgtgccgcaaattccggcaccattcaggctgatgaagccgcgctgaccagcaacaagccaccggtacaagtccaaccggaattgttgccgcatcagggctaa +>13 +atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa +>14 +atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa +>15 +ctgaaactaatacctttttatttgctcgcgctattttctgcctcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaacccctgcagataacccgccttataccgtgacaatattgccgccagtagaagcctgcggggaaaaatgtgtgaaactgaacgtagagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaacgacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctacaaccgacagctgcaattacttagggctggcgtaataccgaaaaataaaaagtggaaatggataacgaagatggaaaattactggttcaggcgggtaaaagcgcactaa +>16 +atgaaaaaatttgatagctgtcatcctgtctttttactgatcggctgtgcccaggtgcctctccgttcctccgtgagcaaaccggtacagcaacctagcgctcagaaagagcaactggccaacgcaaatggtattgatgagtgtcagtcacttccgtatgtgccttcagaccttgcgaagaataaatcattatcaaaccagatcgctgataataccgcatcaaaaaatagcgcaatcagctcacgcattttttgcgaaaaatataagcaaaccaaagaacaggcgcttaccttcttccaggaacttccacaatacatgcgttcgaaagaagttgaagagcaacacatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa +>17 +atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa +>18 +gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggactcacaacatgcctgtagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgccagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcgccatccagtcggcaacctggttttcggcaggcaggtcgcgtagggcggttaaaaaaatgatggattgcggcagcggtatttattcttcaatcaatacgttactgaaaaattcgcagattaaaaatatagtcattttaacccataatcattgtttgacgtatattgtaaaaaataagcgcggcgtgaagtttgatccggagtatcttaatgcgctcgttatgtatgcggaaaacggaaaactccttttagacggtgaatttgttcccggctag +>19 +atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga +>20 +atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/search/G14/protein/hsps.txt b/tests/test_data/outputs/search/G14/protein/hsps.txt new file mode 100755 index 0000000..6423de4 --- /dev/null +++ b/tests/test_data/outputs/search/G14/protein/hsps.txt @@ -0,0 +1,21 @@ +0 0 34 34 1 34 1 34 34 0 100.000 100 100 N/A 4.94e-20 64.3 +1 9 254 254 1 254 1 254 254 0 100.000 100 100 N/A 0.0 514 +2 10 286 286 1 286 1 286 286 0 100.000 100 100 N/A 0.0 579 +3 11 324 324 1 324 1 324 324 0 100.000 100 100 N/A 0.0 640 +4 12 366 366 1 366 1 366 366 0 100.000 100 100 N/A 0.0 754 +5 12 366 366 1 366 1 366 366 0 100.000 100 100 N/A 0.0 754 +6 13 427 427 1 427 1 427 427 0 100.000 100 100 N/A 0.0 869 +7 14 478 478 1 478 1 478 478 0 100.000 100 100 N/A 0.0 972 +8 15 488 488 1 488 1 488 488 0 100.000 100 100 N/A 0.0 1004 +9 16 612 612 1 612 1 612 612 0 100.000 100 100 N/A 0.0 1241 +10 17 638 638 1 638 1 638 638 0 100.000 100 100 N/A 0.0 1326 +11 18 679 679 1 679 1 679 679 0 100.000 100 100 N/A 0.0 1419 +12 1 95 95 1 95 1 95 95 0 100.000 100 100 N/A 1.23e-68 191 +13 19 1645 1645 1 1645 1 1645 1645 0 100.000 100 100 N/A 0.0 3332 +14 2 109 109 1 109 1 109 109 0 100.000 100 100 N/A 3.09e-81 224 +15 3 139 139 1 139 1 139 139 0 100.000 100 100 N/A 2.34e-106 290 +16 4 148 148 1 148 1 148 148 0 100.000 100 100 N/A 1.04e-110 302 +17 5 181 181 1 181 1 181 181 0 100.000 100 100 N/A 1.79e-140 380 +18 6 202 202 1 202 1 202 202 0 100.000 100 100 N/A 3.71e-154 416 +19 7 214 214 1 214 1 214 214 0 100.000 100 100 N/A 1.75e-157 426 +20 8 228 228 1 228 1 228 228 0 100.000 100 100 N/A 6.32e-172 463 diff --git a/tests/test_data/outputs/search/G14/protein/queries.fasta b/tests/test_data/outputs/search/G14/protein/queries.fasta new file mode 100755 index 0000000..285e5c8 --- /dev/null +++ b/tests/test_data/outputs/search/G14/protein/queries.fasta @@ -0,0 +1,42 @@ +>0 +MYDPPFLEALMITASFFAIFIIIVVSVLLLEGD* +>1 +MRILLSNDDGVHAPGIQTLAKALREFADVQVVAPDRNRSGASNSLTLESSLRTFTFDNGDIAVQMGTPTDCVYLGVNALMRPRPDIVVSGINAGPNLGDDVIYSGTVAAAMEGRHLGFPALAVSLNGYQHYDTAAAVTCALLRGLSREPLRTGRILNVNVPDLPLAQVKGIRVTRCGSRHPADKVIPQEDPRGNTLYWIGPPGDKYDAGPDTDFAAVDEGYVSVTPLHVDLTAHSAHDVVSDWLDSVGVGTQW* +>2 +VNLVKTPGLHAAGKGINVANVLKDLGIDVTVGGFLGKDNQDGFQQLFSELGIANRFQVVQGRTRINVKLTEKDGEVTNFNFPGFDVTPADWERFVNDFLSWLGQFDMVCVSGSLPAGVSPEAFTDWMTRLRSQCPCIIVDTSRVELVAGLIAAPWLVKPNRRELEIWAGRKLPEMKDVIDAAPALREQGIAHVVISLGAEGALRVNASGEWIAKPPAVDVENTVGAGGSMVGGWIYGLLMRESTEHTLRLATAVAALAVSQSNVGITDRPQLAAMMARVDLQPFN* +>3 +MNKNKYSTPLLMLATILAGMLSPMQSAVNGQLGHWLQDGNACAVISFASGLVVMFFIIIARKETRQQFASIPTLIKKRKIPLWNWFAGLCGAMVVFSEGASASALGVATFQTALISALLLSGLLCDRFGIGVEEKKYFTPWRITGALFAVIATIFVVSPQWHSTSFILLAILPFLAGLLAGWQPAGNAKVAEATGSMLVSITWNFIVGFCVLGAALAIRIALGHVTIQLPDTWWMYLGGPLGLLSIGLMAILVRGLGLLMLGVASTAGQLLGSVLIDELIPSLGNTVYLVTIIGTLFALVGAIVTTIPEYRASKMAQKMEVSE* +>4 +MKGRWAKYVATGVMLAMLAACSSKPTDRGQQYKDGKFTQPFSLVNQPDAVGAPINAGDFAEQVDQIRSASPRLYTNQSNVYNAVQNWLRSGGDTRTMRQFGIDAWQMEGTDNYGNVQFTGYYTPVVQARHTRQGAFQYPIYSMPPKRGRLPSRAQIYAGALSDKYILAWSNSLMDNFIMDVQGSGYIDFGDGSPLNFFSYAGKNGWPYRSIGKVLIDRGEVKKEDMSMQAIREWGEKHSEAEVRELLEQNPSFVFFKPQSFAPVKGASAVPLIGRASVASDRSIIPPGTTLLAEVPLLDNNGKFSGQYELRLMVALDVGGAIKGQHFDIYQGIGPDAGHRAGWYNHYGRVWVLKSAPGAGNVFSG* +>5 +MKGRWAKYVATGVMLAMLAACSSKPTDRGQQYKDGKFTQPFSLVNQPDAVGAPINAGDFAEQVDQIRSASPRLYTNQSNVYNAVQNWLRSGGDTRTMRQFGIDAWQMEGTDNYGNVQFTGYYTPVVQARHTRQGAFQYPIYSMPPKRGRLPSRAQIYAGALSDKYILAWSNSLMDNFIMDVQGSGYIDFGDGSPLNFFSYAGKNGWPYRSIGKVLIDRGEVKKEDMSMQAIREWGEKHSEAEVRELLEQNPSFVFFKPQSFAPVKGASAVPLIGRASVASDRSIIPPGTTLLAEVPLLDNNGKFSGQYELRLMVALDVGGAIKGQHFDIYQGIGPDAGHRAGWYNHYGRVWVLKSAPGAGNVFSG* +>6 +LFDRYDAGEQAVLVHIYFSQDKDMEDLQEFESLVYYAGVEAMQVITGSRKAPHPKYFVGEGKAVEIAEAVKATGAAVVLFDHALSPAQERNLERLCECRVIDRTGLILDIFAQRARTHEGKLQVELAQLRHLATRLVRGWTHLERRKGGIGSRGPGETQLEADRRLLRNRIVQIQSRLEKVEKQREQGRQSRIKADVPTVSLVGYTNAGKSTLFNQITEARVYAADQLFATLDPTLRRIDVADVGETVLADTVGFIRHLPYDLVAAFKATLQETRQATQLLHVVDAADVRVQENIEAVNTVLEEIDAHEFPTLMVMNKIDMLDDFEPRIDRDEENKPIRVWLSAQSGVGIPQLFQALTERLSGEVAQHTLRLLPQEGRLRSRFYQLQAIEKEWMEEDGSVSPQVRMPIVDWRRFCKQEPALIEYVI* +>7 +VAQRILVLGASGYIGQHLVFALSQQGHQVRAAARRVERLEKHRLANVSCHKVDLHWPENLPALLRDIDTVYYLVHGMGEGGDFIAHERQAALNVRDALRQTPVKQLIFLSSLQAPAHEQSDHLRARQLTADTLRDAGVPVTELRAGIIVGAGSAAFEVMRDMVYNLPILTPPRWVRSRTTPIALENLLYYLVGLLEHPAHEHRILEAAGPQVLSYQQQFERFMAVSGKRRPLIPVPFPTRWISVWFLNVITSVPPTTAKALIQGLRHDLLADDAALKKLIPQTLITFDDAVRRTLKEEEKLVNSSDWGYDALAFARWRPEYGYFPKQAGFTAQTPASLSALWQVVNRLGGKEGYFFGNILWQTRAAMDRLVGHKLAKGRPSHTLLKPGDTVDSWKVIIVEPEKQLTLLFGMKAPGLGRLSFTLHDKGRYREIDVRAWWHPHGMPGLIYWLLMIPAHLFIFRGMARRIARLAEQITEK* +>8 +MNKFARHFLPLSLRVRFLLATAGVVLVLSLAYGIVALVGYSVSFDKTTFRLLRGESNLFYTLAKWENNKICVELPENLDMQSPTMTVIYDETGKLLWTQRNIPWLIKSTQPEGLKTNGFHEIETNVDATSTLLSEDHSAQEKLKEVREDDDDAEMTHSVAVNIYPATTRMPQLTIVVVDTIPIELKGSYMVWSWFVYVLAANLLLVIPLLWIAAWWSLRPIDALSREVRELEDHHREMLNPETTRKLISLVRNLNQLLKSEHERYNKYRTSLTDLTHSLKTPLAVLQSTLRSLRNEKMSVSKAEPVMLEQISRISQQIGYYLHRASMRGSGVLLSRELHPVAPLLDNLISALNKVYLRKGVNISMDISPEISFVGEQNDFVEVMGNVLDNACKYCLEFVEISARQTDDHLHIFVEDDGPGIAHSKRSLVFDRGQRADTLRPGQGVGLAVAREITEQYAGQIIASDSLLGGARMEVVSGRQHPTQKEE* +>9 +MTIQKRLLEAVEQKLLRPIDAQFALTVAGNDDPAVTLAAALLSHDAGEGHVCLPLSRLTLTEEAHPLLVAWISETATPIDWKKRLLASAAVSCGDSPAPLILCGDRLYLNRMWCNERTVARFFNEVNQAIAVDEDQLSRILDALFPPTDEVNWQKVAAAVALTRRISVISGGPGTGKTTTVAKLLAALIQMADGERCRIRLAAPTGKAAARLTESLGAALRQLPLTDAQKKRIPEDASTLHRLLGAQPGSQRLRHHAGNPLHLDVLVVDEASMIDLPMMSRLIDALPPHGRVIFLGDRDQLASVEAGAVLGDICAYVNAGFTAERARQLSRLTGSAIPAGAGTQAASLRDSLCLLQKSYRFGSDSGIGKLAAAINCGDRSAIQAVFQQGFSDIEKRTLQSSDDYAGMLDEALAGYGRYLRLLHEKAAPEAILQAFNEYQLLCALREGPFGVRGLNDRIEQAMVQQRKIQRHPHSRWYEGRPVMIARNDSALGLFNGDIGIALDRGQGLRVWFVMPDGTIKSVQPSRLPEHDTTWAMTVHKSQGSEFDHAALILPSQRSPVVTRELVYTAVTRARRRLSLYADERILAGAIVTRTERRSGLATLFDEVSRIG* +>10 +MQEVAMSSQEASKMLRTYNIAWWGNNYYDVNELGHISVCPDPDVPEARVDLAKLVKAREAQGQRLPALFCFPQILQHRLRSINAAFKRARESYGYNGDYFLVYPIKVNQHRRVIESLIHSGEPLGLEAGSKAELMAVLAHAGMTRSVIVCNGYKDREYIRLALIGEKMGHKVYLVIEKMSEIAIVLEEAERLNVVPRLGVRARLASQGSGKWQSSGGEKSKFGLAATQVLQLVETLRDAGRLDSLQLLHFHLGSQMANIRDIATGVRESARFYVELHKLGVNIQCFDVGGGLGVDYEGTRSQSDCSVNYGLNEYANNIIWAIGDACEEHGLPHPTVITESGRAVTAHHTVLVSNIIGVERNEYTDPTAPAEDAPRALQNLWETWQEMHKPGTRRSLREWLHDSQMDLHDIHIGYSSGAFSLQERAWAEQLYLSMCHEVQKQLDPQNRAHRPIIDELQERMADKMYVNFSLFQSMPDAWGIDQLFPVLPLEGLDQVPERRAVLLDITCDSDGAIDHYIDGDGIATTMPMPEYDPENPPMLGFFMVGAYQEILGNMHNLFGDTEAVDVFVFPDGSVEVELSDEGDTVADMLQYVQLDPKTLLTHFRDQVKQTDLDDALQQQFLEEFEAGLYGYTYLEDE* +>11 +MNSLPQRSTDFELTTSQDGFALSWQQRLILRHSAENPCLWIGAGVADIDMFRGNFSIKDKLNEKIALTEATVSELPDGWLVQFSRGATISATLRISADEAGRLTLDLQNDDLHHNRIWLRLAANPDDHIYGCGEQFSYFDLRGKPFPLWTSEQGVGRNKTSYVTWQADCKENAGGDYYLTFFPQPTFVSTQKYYCHVDNSCYMNFDFSAPEYHELALWEDKTTLRFECADTYIALLEKLTALLGRQPELPDWVYDGVTLGIQGGTEVCQQKLDNMRNAGVKVYGIWAQDWSGIRMTSFGKRVMWNWKWNSDNYPQLDSRIKQWKEEGVQFLSYINPYVASDKDLCAEAARHGYLAKDATGGDYLVEFGEFYGGVVDLTNPEAYDWFKDVIKKNMIALGCSGWMADFGEYLPTDTYLHNGVSAELMHNAWPALWAKCNYEALQKTGKLGEILFFMRAGYTGSQKYSTMMWAGDQNVDWSLDDGLASVVPAALSLAMTGHGLHHSDIGGYTTLFDMKRSKELLLRWCDFSAFTPMMRTHEGNRPSNDWQFDGDAETIAHFARMTTVFTTLKPYLKQAVAQNAATGLPVMRPLFLHYENDAATYTLKYQYLLGQDLLVAPVHEQGRCDWTLYLPEDHWVNIWTGEAHHGGEISVDAPIGKPPVFYRAKSEWALLFASLRNI* +>12 +MKLFLTTAALTATLTSGMGFASDTVIPWATNSGGTESTHIAAMGEDVNAQHQHIIHTHEGVCAANSGTIQADEAALTSNKPPVQVQPELLPHQG* +>13 +MKHLRVVACMIMLALAGCDNNDKTAPTTKSEAPAVAQPSPAQDPSQLQKLAQQSQGKALTLLDASEAQLDGAATLVLTFSIPLDPEQDFSRVVHVVDKKSGSVDGAWELAPNLKELRLRHLEPERVLVVTVDPAVKALNNATFGKSYEKTITTRDVQPSVGFASRGSLLPGKIAEGLPVMALNVNHVDVNFFRVKPGSLASFVSQWEYRSSLSNWESDNLLKMADLVYTGRFDLNPARNTREKLLLPLSDIKPLQQAGVYVAVMNQAGHYNYSNAATLFTLSDIGVSAHRYHSRLDIFTQSLENGAAQSGIEIVLLNDKGQTLAQATSDAQGHVQLEADKAAALLLARKEGQTTLLDLTLPALDLSEFNVAGAPGYSKQFFMFGPRDLYRPGETVILNGLLRDSDGKTLPDQPVKLEVVKPDGQVMRTVVSQPENGLYRLNYPLDINAPTGLWHVRANTGDNLLRSWDFHVEDFMPERMALNLTAQKTPLAPADEVKFSVVGYYLYGAPANGNTLQGQLFLRPLRDAVAALPGFQFGNIAEENLSRSLDEVQLTLDKGGRGEVSAASQWQEAHSPLQVILQASLLESGGRPVTRRVEQAIWPADTLPGIRPQFAAKAVYDYRTDTTVNQPIVDEDSNAAFDIVYANAQGEKKAVSGLQVRLIRERRDYYWNWSESEGWQSQFDQKDLVEGEQTLDLNADETGKVSFPVEWGAYRLEVKAPNETVSSVRFWAGYSWQDNSDGSGAARPDRVTLKLDKANYRPGDTMKLHIAAPVAGKGYAMVESSDGPLWWQAIDVPAQGLELTIPVDKTWNRHDLYLSTLVVRPGDKSRSATPKRAVGLLHLPLGDDNRRLDLALESPAKMRPNQPLTVRVKASVKHGEMPKQINVLVSAVDSGVLNITDYATPDPWQAFFGQKRYGADIYDIYGQVIEGQGRLAALRFGGDGDDLTRGGKPPVNHANIIAQQAQPITLNEQGEGVVTLPIGDFNGELRVMAQAWTADDFGRGESKVVVAAPVIAELNMPRFLAGGDVSRLVLDVTNLTDRPQTLNIALAASGLLELLSQQPQPVNLAPGVRTTLFVPVRALEGFGEGEIQATISGLNLPGETLGAQHKQWQIGVRPAWPAQTVNSGIALAPGESWHVPEQHLANVSPATLQGQLLLSGKPPLNLARYIRELKAYPYGCLEQTTSGLFPALYTNAAQLQSLGITGDSDEKRRAAVDIGISRILQMQRDNGGFALWDENGAEEPWLTAYAMDFLIRAGEQGYSVPPEAINRGNERLLRYLQDPGTMLIRYSDNTQASTFAAQAYAALVLARQQKAPLGALREIWERRSQAASGLPLMQLGIALNTMGDARRGEEAITLALNTPRQDERQWIADYGSSLRDNALMLSLLEENNLRPDAQNALLSSLSEQAFGQRWLSTQENNALFLAAHSRQASAGAWQAQTSLEAQPLSGDKALTRNLDADQLAALEVTNTGSQPLWLRLDSSGYPSSAPEPASNVLQIERQILGTDGQRKSLSSLRSGELVLVWLTVVADRNVPDALVVDLLPAGLELENQNLADSSASLPESGSEVQNLLNQMQQADIQYMEFRDDRFVAAVVVNEGQPVTLVYLARAVTPGTYQLAQPQVESMYAPQWRATGASEGLLIVTP* +>14 +MAKNRSRRLRKKMHIDEFQELGFSVAWRFPEGTSEEQIDKTVDDFINDVIEPNKLAFDGSGYLAWEGLICMQEIGKCTEEHQAIVRKWLEARNLEEVRTSELFDVWWD* +>15 +LKLIPFYLLALFSASSGATEINACKDLIGTWKTPADNPPYTVTILPPVEACGEKCVKLNVEYELDVTHRNALYCHERQEGVKGQGPMVIAFEGAYGGHAIGTYNRQLQLLRAGVIPKNKKWKWITKMENYWFRRVKAH* +>16 +MKKFDSCHPVFLLIGCAQVPLRSSVSKPVQQPSAQKEQLANANGIDECQSLPYVPSDLAKNKSLSNQIADNTASKNSAISSRIFCEKYKQTKEQALTFFQELPQYMRSKEVEEQHMTEFKKVLLEPGSKNLSIYQTLLAAHERLQAL* +>17 +MRIKPDDNWRWYYDEEHDRMMLDLANGMLFRSRFSRKMLTPDAFCPTGFCVDDAALYFSFEEKCRDFELTKEQRAELVLNALVAIRYLKPQMPKSWHFVAHGEMWTPGTGDAASVWLSDTAEQVNLLVVEPGENAALCLLAQPGVVIAGRTMQLGDAIKIMNDRLKPQVHCHSFSLEQAV* +>18 +VLAFTLRFIKNKRYFAILAGALVIIAGLDSQHACSGNGLPQINGKALAALAKQHPVVVLFRHAERCDRSDNTCLPDSTGITVNGAQDARALGKAFSADIQNYNLYSSNTVRAIQSATWFSAGRSRRAVKKMMDCGSGIYSSINTLLKNSQIKNIVILTHNHCLTYIVKNKRGVKFDPEYLNALVMYAENGKLLLDGEFVPG* +>19 +MGSNYIVIEGLEGAGKTTARDVVVETLEQLGIRNMIFTREPGGTQLAEKLRSLVLDIRSVGDEVITDKAEVLMFYAARVQLVETVIKPALAQGVWVIGDRHDLSTQAYQGGGRGIDQTMLATLRDAVLGDFRPDLTLYLDVTPEVGLKRARARGDLDRIEQESFDFFNRTRARYLELAAQDSRIRTIDATQPLDAVMRDIRATVTKWVQEQAA* +>20 +MKHIKKSVLVVLLTSHVAHASIVVGGTRLVFDGNNDESSINVENKDSKANLVQSWLSVADPQVTNKQVFIITPPLFRLDAGQKNSIRVIRSGAPLPADRESMYWLNIKGIPSIDDNASANRVEISINTQIKLIYRPPALTKSTPDSQSQQLKWQTAGDVITVNNPTPYYMNFASVTLNSHEVKSATFVPPKSSASFKLSSTAAPHGTVTWRLISDYGMSLEPHSGSF* diff --git a/tests/test_data/outputs/search/G2/nucleotide/hsps.txt b/tests/test_data/outputs/search/G2/nucleotide/hsps.txt new file mode 100755 index 0000000..00093e5 --- /dev/null +++ b/tests/test_data/outputs/search/G2/nucleotide/hsps.txt @@ -0,0 +1,20 @@ +0 0 102 102 1 102 1 102 102 0 100.000 100 100 plus 1.80e-51 189 +1 9 762 762 1 762 1 762 762 0 100.000 100 100 plus 0.0 1408 +2 10 858 858 1 858 1 858 858 0 100.000 100 100 plus 0.0 1585 +3 11 972 972 1 972 1 972 972 0 100.000 100 100 plus 0.0 1796 +4 12 1098 1098 1 1098 1 1098 1098 0 100.000 100 100 plus 0.0 2028 +5 13 1281 1281 1 1281 1 1281 1281 0 100.000 100 100 plus 0.0 2366 +6 14 1434 1434 1 1434 1 1434 1434 0 100.000 100 100 plus 0.0 2649 +7 15 1464 1464 1 1464 1 1464 1464 0 100.000 100 100 plus 0.0 2704 +8 16 1836 1836 1 1836 1 1836 1836 0 100.000 100 100 plus 0.0 3391 +9 17 1914 1914 1 1914 1 1914 1914 0 100.000 100 100 plus 0.0 3535 +10 18 2037 2037 1 2037 1 2037 2037 0 100.000 100 100 plus 0.0 3762 +11 1 285 285 1 285 1 285 285 0 100.000 100 100 plus 1.03e-152 527 +12 19 4935 4935 1 4935 1 4935 4935 0 100.000 100 100 plus 0.0 9114 +13 2 327 327 1 327 1 327 327 0 100.000 100 100 plus 5.34e-176 604 +14 3 417 417 1 417 1 417 417 0 100.000 100 100 plus 0.0 771 +15 4 444 444 1 444 1 444 444 0 100.000 100 100 plus 0.0 821 +16 5 543 543 1 543 1 543 543 0 100.000 100 100 plus 0.0 1003 +17 6 606 606 1 606 1 606 606 0 100.000 100 100 plus 0.0 1120 +18 7 642 642 1 642 1 642 642 0 100.000 100 100 plus 0.0 1186 +19 8 684 684 1 684 1 684 684 0 100.000 100 100 plus 0.0 1264 diff --git a/tests/test_data/outputs/search/G2/nucleotide/queries.fasta b/tests/test_data/outputs/search/G2/nucleotide/queries.fasta new file mode 100755 index 0000000..9cad9a4 --- /dev/null +++ b/tests/test_data/outputs/search/G2/nucleotide/queries.fasta @@ -0,0 +1,40 @@ +>0 +atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa +>1 +atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa +>2 +gtgaacctggtgaaaaccccgggtctgcacgcggcgggcaaaggcattaacgttgccaatgtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccaacttcaactttcccggctttgacgtgactcccgcagactgggaacgctttgttaacgacttcctgagctggctgggtcagttcgatatggtctgcgtaagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatcgttgatactagccgtgtagagttagtcgccggtcttatagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcccgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgcgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggaaaataccgtcggcgcaggcggttccatggttggcggatggatttacggcctgctgatgcgcgagtccaccgaacatacgctgcgcctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga +>3 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>4 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>5 +ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctattacgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccagcgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcggaaaggcgggattggttcgcgcggtccgggtgaaacccagctcgaagccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgagaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgcagctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaattccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtccgcaggtacgaatgccgattgtcgactggcgtcgcttctgtaaacaagaaccagcgttgatcgaatacgtgatctag +>6 +gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga +>7 +atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctcgtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatctgcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacggtgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcactcaaccggaagggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaaggctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgatgcgctgtcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtaagctgatcagccttgtgcgcaaccttaatcaactgctcaaaagcgagcacgaacgttataacaaataccgcacgagcctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatctgcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcatagcccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttctggccgacagcatcccacacagaaagaggaataa +>8 +atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa +>9 +atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag +>10 +atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacatcgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccacagtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggtcgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattacttgaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcatacagggcggtacggaagtttgtcagcaaaaactggataacatgcgcaacgcaggcgtaaaagtgtacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggggtcagcgcagagctcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacagaagaccggcaagctcggcgagatcctgttctttatgcgggcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccagcaatgactggcagttcgacggcgacgcggaaactattgcacactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgagatttccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa +>11 +atgaaactttttctgacaacggcagcgctaactgcgaccttgacttccggcatgggatttgccagcgataccgtgatcccctgggcgactaacagtggcggtacggaaagcacgcatatcgcggcaatgggcgaggatgtgaatgcgcagcatcagcacatcatccacacgcatgaaggcgtatgtgccgcaaattccggcaccattcaggctgatgaagccgcgctgaccagcaacaagccaccggtacaagtccaaccggaattgttgccgcatcagggctaa +>12 +atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa +>13 +atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa +>14 +ctgaaactaatacctttttatttgctcgcgctattttctgcctcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaacccctgcagataacccgccttataccgtgacaatattgccgccagtagaagcctgcggggaaaaatgtgtgaaactgaacgtagagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaacgacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctacaaccgacagctgcaattacttagggctggcgtaataccgaaaaataaaaagtggaaatggataacgaagatggaaaattactggttcaggcgggtaaaagcgcactaa +>15 +atgaaaaaatttgatagctgtcatcctgtctttttactgatcggctgtgcccaggtgcctctccgttcctccgtgagcaaaccggtacagcaacctagcgctcagaaagagcaactggccaacgcaaatggtattgatgagtgtcagtcacttccgtatgtgccttcagaccttgcgaagaataaatcattatcaaaccagatcgctgataataccgcatcaaaaaatagcgcaatcagctcacgcattttttgcgaaaaatataagcaaaccaaagaacaggcgcttaccttcttccaggaacttccacaatacatgcgttcgaaagaagttgaagagcaacacatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa +>16 +atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa +>17 +gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggactcacaacatgcctgtagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgccagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcgccatccagtcggcaacctggttttcggcaggcaggtcgcgtagggcggttaaaaaaatgatggattgcggcagcggtatttattcttcaatcaatacgttactgaaaaattcgcagattaaaaatatagtcattttaacccataatcattgtttgacgtatattgtaaaaaataagcgcggcgtgaagtttgatccggagtatcttaatgcgctcgttatgtatgcggaaaacggaaaactccttttagacggtgaatttgttcccggctag +>18 +atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga +>19 +atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/search/G2/protein/hsps.txt b/tests/test_data/outputs/search/G2/protein/hsps.txt new file mode 100755 index 0000000..73033ac --- /dev/null +++ b/tests/test_data/outputs/search/G2/protein/hsps.txt @@ -0,0 +1,20 @@ +0 0 34 34 1 34 1 34 34 0 100.000 100 100 N/A 4.94e-20 64.3 +1 9 254 254 1 254 1 254 254 0 100.000 100 100 N/A 0.0 514 +2 10 286 286 1 286 1 286 286 0 100.000 100 100 N/A 0.0 579 +3 11 324 324 1 324 1 324 324 0 100.000 100 100 N/A 0.0 640 +4 12 366 366 1 366 1 366 366 0 100.000 100 100 N/A 0.0 754 +5 13 427 427 1 427 1 427 427 0 100.000 100 100 N/A 0.0 869 +6 14 478 478 1 478 1 478 478 0 100.000 100 100 N/A 0.0 972 +7 15 488 488 1 488 1 488 488 0 100.000 100 100 N/A 0.0 1004 +8 16 612 612 1 612 1 612 612 0 100.000 100 100 N/A 0.0 1241 +9 17 638 638 1 638 1 638 638 0 100.000 100 100 N/A 0.0 1326 +10 18 679 679 1 679 1 679 679 0 100.000 100 100 N/A 0.0 1419 +11 1 95 95 1 95 1 95 95 0 100.000 100 100 N/A 1.23e-68 191 +12 19 1645 1645 1 1645 1 1645 1645 0 100.000 100 100 N/A 0.0 3332 +13 2 109 109 1 109 1 109 109 0 100.000 100 100 N/A 3.09e-81 224 +14 3 139 139 1 139 1 139 139 0 100.000 100 100 N/A 2.34e-106 290 +15 4 148 148 1 148 1 148 148 0 100.000 100 100 N/A 1.04e-110 302 +16 5 181 181 1 181 1 181 181 0 100.000 100 100 N/A 1.79e-140 380 +17 6 202 202 1 202 1 202 202 0 100.000 100 100 N/A 3.71e-154 416 +18 7 214 214 1 214 1 214 214 0 100.000 100 100 N/A 1.75e-157 426 +19 8 228 228 1 228 1 228 228 0 100.000 100 100 N/A 6.32e-172 463 diff --git a/tests/test_data/outputs/search/G2/protein/queries.fasta b/tests/test_data/outputs/search/G2/protein/queries.fasta new file mode 100755 index 0000000..34499a1 --- /dev/null +++ b/tests/test_data/outputs/search/G2/protein/queries.fasta @@ -0,0 +1,40 @@ +>0 +MYDPPFLEALMITASFFAIFIIIVVSVLLLEGD* +>1 +MRILLSNDDGVHAPGIQTLAKALREFADVQVVAPDRNRSGASNSLTLESSLRTFTFDNGDIAVQMGTPTDCVYLGVNALMRPRPDIVVSGINAGPNLGDDVIYSGTVAAAMEGRHLGFPALAVSLNGYQHYDTAAAVTCALLRGLSREPLRTGRILNVNVPDLPLAQVKGIRVTRCGSRHPADKVIPQEDPRGNTLYWIGPPGDKYDAGPDTDFAAVDEGYVSVTPLHVDLTAHSAHDVVSDWLDSVGVGTQW* +>2 +VNLVKTPGLHAAGKGINVANVLKDLGIDVTVGGFLGKDNQDGFQQLFSELGIANRFQVVQGRTRINVKLTEKDGEVTNFNFPGFDVTPADWERFVNDFLSWLGQFDMVCVSGSLPAGVSPEAFTDWMTRLRSQCPCIIVDTSRVELVAGLIAAPWLVKPNRRELEIWAGRKLPEMKDVIDAAPALREQGIAHVVISLGAEGALRVNASGEWIAKPPAVDVENTVGAGGSMVGGWIYGLLMRESTEHTLRLATAVAALAVSQSNVGITDRPQLAAMMARVDLQPFN* +>3 +MNKNKYSTPLLMLATILAGMLSPMQSAVNGQLGHWLQDGNACAVISFASGLVVMFFIIIARKETRQQFASIPTLIKKRKIPLWNWFAGLCGAMVVFSEGASASALGVATFQTALISALLLSGLLCDRFGIGVEEKKYFTPWRITGALFAVIATIFVVSPQWHSTSFILLAILPFLAGLLAGWQPAGNAKVAEATGSMLVSITWNFIVGFCVLGAALAIRIALGHVTIQLPDTWWMYLGGPLGLLSIGLMAILVRGLGLLMLGVASTAGQLLGSVLIDELIPSLGNTVYLVTIIGTLFALVGAIVTTIPEYRASKMAQKMEVSE* +>4 +MKGRWAKYVATGVMLAMLAACSSKPTDRGQQYKDGKFTQPFSLVNQPDAVGAPINAGDFAEQVDQIRSASPRLYTNQSNVYNAVQNWLRSGGDTRTMRQFGIDAWQMEGTDNYGNVQFTGYYTPVVQARHTRQGAFQYPIYSMPPKRGRLPSRAQIYAGALSDKYILAWSNSLMDNFIMDVQGSGYIDFGDGSPLNFFSYAGKNGWPYRSIGKVLIDRGEVKKEDMSMQAIREWGEKHSEAEVRELLEQNPSFVFFKPQSFAPVKGASAVPLIGRASVASDRSIIPPGTTLLAEVPLLDNNGKFSGQYELRLMVALDVGGAIKGQHFDIYQGIGPDAGHRAGWYNHYGRVWVLKSAPGAGNVFSG* +>5 +LFDRYDAGEQAVLVHIYFSQDKDMEDLQEFESLVYYAGVEAMQVITGSRKAPHPKYFVGEGKAVEIAEAVKATGAAVVLFDHALSPAQERNLERLCECRVIDRTGLILDIFAQRARTHEGKLQVELAQLRHLATRLVRGWTHLERRKGGIGSRGPGETQLEADRRLLRNRIVQIQSRLEKVEKQREQGRQSRIKADVPTVSLVGYTNAGKSTLFNQITEARVYAADQLFATLDPTLRRIDVADVGETVLADTVGFIRHLPYDLVAAFKATLQETRQATQLLHVVDAADVRVQENIEAVNTVLEEIDAHEFPTLMVMNKIDMLDDFEPRIDRDEENKPIRVWLSAQSGVGIPQLFQALTERLSGEVAQHTLRLLPQEGRLRSRFYQLQAIEKEWMEEDGSVSPQVRMPIVDWRRFCKQEPALIEYVI* +>6 +VAQRILVLGASGYIGQHLVFALSQQGHQVRAAARRVERLEKHRLANVSCHKVDLHWPENLPALLRDIDTVYYLVHGMGEGGDFIAHERQAALNVRDALRQTPVKQLIFLSSLQAPAHEQSDHLRARQLTADTLRDAGVPVTELRAGIIVGAGSAAFEVMRDMVYNLPILTPPRWVRSRTTPIALENLLYYLVGLLEHPAHEHRILEAAGPQVLSYQQQFERFMAVSGKRRPLIPVPFPTRWISVWFLNVITSVPPTTAKALIQGLRHDLLADDAALKKLIPQTLITFDDAVRRTLKEEEKLVNSSDWGYDALAFARWRPEYGYFPKQAGFTAQTPASLSALWQVVNRLGGKEGYFFGNILWQTRAAMDRLVGHKLAKGRPSHTLLKPGDTVDSWKVIIVEPEKQLTLLFGMKAPGLGRLSFTLHDKGRYREIDVRAWWHPHGMPGLIYWLLMIPAHLFIFRGMARRIARLAEQITEK* +>7 +MNKFARHFLPLSLRVRFLLATAGVVLVLSLAYGIVALVGYSVSFDKTTFRLLRGESNLFYTLAKWENNKICVELPENLDMQSPTMTVIYDETGKLLWTQRNIPWLIKSTQPEGLKTNGFHEIETNVDATSTLLSEDHSAQEKLKEVREDDDDAEMTHSVAVNIYPATTRMPQLTIVVVDTIPIELKGSYMVWSWFVYVLAANLLLVIPLLWIAAWWSLRPIDALSREVRELEDHHREMLNPETTRKLISLVRNLNQLLKSEHERYNKYRTSLTDLTHSLKTPLAVLQSTLRSLRNEKMSVSKAEPVMLEQISRISQQIGYYLHRASMRGSGVLLSRELHPVAPLLDNLISALNKVYLRKGVNISMDISPEISFVGEQNDFVEVMGNVLDNACKYCLEFVEISARQTDDHLHIFVEDDGPGIAHSKRSLVFDRGQRADTLRPGQGVGLAVAREITEQYAGQIIASDSLLGGARMEVVSGRQHPTQKEE* +>8 +MTIQKRLLEAVEQKLLRPIDAQFALTVAGNDDPAVTLAAALLSHDAGEGHVCLPLSRLTLTEEAHPLLVAWISETATPIDWKKRLLASAAVSCGDSPAPLILCGDRLYLNRMWCNERTVARFFNEVNQAIAVDEDQLSRILDALFPPTDEVNWQKVAAAVALTRRISVISGGPGTGKTTTVAKLLAALIQMADGERCRIRLAAPTGKAAARLTESLGAALRQLPLTDAQKKRIPEDASTLHRLLGAQPGSQRLRHHAGNPLHLDVLVVDEASMIDLPMMSRLIDALPPHGRVIFLGDRDQLASVEAGAVLGDICAYVNAGFTAERARQLSRLTGSAIPAGAGTQAASLRDSLCLLQKSYRFGSDSGIGKLAAAINCGDRSAIQAVFQQGFSDIEKRTLQSSDDYAGMLDEALAGYGRYLRLLHEKAAPEAILQAFNEYQLLCALREGPFGVRGLNDRIEQAMVQQRKIQRHPHSRWYEGRPVMIARNDSALGLFNGDIGIALDRGQGLRVWFVMPDGTIKSVQPSRLPEHDTTWAMTVHKSQGSEFDHAALILPSQRSPVVTRELVYTAVTRARRRLSLYADERILAGAIVTRTERRSGLATLFDEVSRIG* +>9 +MQEVAMSSQEASKMLRTYNIAWWGNNYYDVNELGHISVCPDPDVPEARVDLAKLVKAREAQGQRLPALFCFPQILQHRLRSINAAFKRARESYGYNGDYFLVYPIKVNQHRRVIESLIHSGEPLGLEAGSKAELMAVLAHAGMTRSVIVCNGYKDREYIRLALIGEKMGHKVYLVIEKMSEIAIVLEEAERLNVVPRLGVRARLASQGSGKWQSSGGEKSKFGLAATQVLQLVETLRDAGRLDSLQLLHFHLGSQMANIRDIATGVRESARFYVELHKLGVNIQCFDVGGGLGVDYEGTRSQSDCSVNYGLNEYANNIIWAIGDACEEHGLPHPTVITESGRAVTAHHTVLVSNIIGVERNEYTDPTAPAEDAPRALQNLWETWQEMHKPGTRRSLREWLHDSQMDLHDIHIGYSSGAFSLQERAWAEQLYLSMCHEVQKQLDPQNRAHRPIIDELQERMADKMYVNFSLFQSMPDAWGIDQLFPVLPLEGLDQVPERRAVLLDITCDSDGAIDHYIDGDGIATTMPMPEYDPENPPMLGFFMVGAYQEILGNMHNLFGDTEAVDVFVFPDGSVEVELSDEGDTVADMLQYVQLDPKTLLTHFRDQVKQTDLDDALQQQFLEEFEAGLYGYTYLEDE* +>10 +MNSLPQRSTDFELTTSQDGFALSWQQRLILRHSAENPCLWIGAGVADIDMFRGNFSIKDKLNEKIALTEATVSELPDGWLVQFSRGATISATLRISADEAGRLTLDLQNDDLHHNRIWLRLAANPDDHIYGCGEQFSYFDLRGKPFPLWTSEQGVGRNKTSYVTWQADCKENAGGDYYLTFFPQPTFVSTQKYYCHVDNSCYMNFDFSAPEYHELALWEDKTTLRFECADTYIALLEKLTALLGRQPELPDWVYDGVTLGIQGGTEVCQQKLDNMRNAGVKVYGIWAQDWSGIRMTSFGKRVMWNWKWNSDNYPQLDSRIKQWKEEGVQFLSYINPYVASDKDLCAEAARHGYLAKDATGGDYLVEFGEFYGGVVDLTNPEAYDWFKDVIKKNMIALGCSGWMADFGEYLPTDTYLHNGVSAELMHNAWPALWAKCNYEALQKTGKLGEILFFMRAGYTGSQKYSTMMWAGDQNVDWSLDDGLASVVPAALSLAMTGHGLHHSDIGGYTTLFDMKRSKELLLRWCDFSAFTPMMRTHEGNRPSNDWQFDGDAETIAHFARMTTVFTTLKPYLKQAVAQNAATGLPVMRPLFLHYENDAATYTLKYQYLLGQDLLVAPVHEQGRCDWTLYLPEDHWVNIWTGEAHHGGEISVDAPIGKPPVFYRAKSEWALLFASLRNI* +>11 +MKLFLTTAALTATLTSGMGFASDTVIPWATNSGGTESTHIAAMGEDVNAQHQHIIHTHEGVCAANSGTIQADEAALTSNKPPVQVQPELLPHQG* +>12 +MKHLRVVACMIMLALAGCDNNDKTAPTTKSEAPAVAQPSPAQDPSQLQKLAQQSQGKALTLLDASEAQLDGAATLVLTFSIPLDPEQDFSRVVHVVDKKSGSVDGAWELAPNLKELRLRHLEPERVLVVTVDPAVKALNNATFGKSYEKTITTRDVQPSVGFASRGSLLPGKIAEGLPVMALNVNHVDVNFFRVKPGSLASFVSQWEYRSSLSNWESDNLLKMADLVYTGRFDLNPARNTREKLLLPLSDIKPLQQAGVYVAVMNQAGHYNYSNAATLFTLSDIGVSAHRYHSRLDIFTQSLENGAAQSGIEIVLLNDKGQTLAQATSDAQGHVQLEADKAAALLLARKEGQTTLLDLTLPALDLSEFNVAGAPGYSKQFFMFGPRDLYRPGETVILNGLLRDSDGKTLPDQPVKLEVVKPDGQVMRTVVSQPENGLYRLNYPLDINAPTGLWHVRANTGDNLLRSWDFHVEDFMPERMALNLTAQKTPLAPADEVKFSVVGYYLYGAPANGNTLQGQLFLRPLRDAVAALPGFQFGNIAEENLSRSLDEVQLTLDKGGRGEVSAASQWQEAHSPLQVILQASLLESGGRPVTRRVEQAIWPADTLPGIRPQFAAKAVYDYRTDTTVNQPIVDEDSNAAFDIVYANAQGEKKAVSGLQVRLIRERRDYYWNWSESEGWQSQFDQKDLVEGEQTLDLNADETGKVSFPVEWGAYRLEVKAPNETVSSVRFWAGYSWQDNSDGSGAARPDRVTLKLDKANYRPGDTMKLHIAAPVAGKGYAMVESSDGPLWWQAIDVPAQGLELTIPVDKTWNRHDLYLSTLVVRPGDKSRSATPKRAVGLLHLPLGDDNRRLDLALESPAKMRPNQPLTVRVKASVKHGEMPKQINVLVSAVDSGVLNITDYATPDPWQAFFGQKRYGADIYDIYGQVIEGQGRLAALRFGGDGDDLTRGGKPPVNHANIIAQQAQPITLNEQGEGVVTLPIGDFNGELRVMAQAWTADDFGRGESKVVVAAPVIAELNMPRFLAGGDVSRLVLDVTNLTDRPQTLNIALAASGLLELLSQQPQPVNLAPGVRTTLFVPVRALEGFGEGEIQATISGLNLPGETLGAQHKQWQIGVRPAWPAQTVNSGIALAPGESWHVPEQHLANVSPATLQGQLLLSGKPPLNLARYIRELKAYPYGCLEQTTSGLFPALYTNAAQLQSLGITGDSDEKRRAAVDIGISRILQMQRDNGGFALWDENGAEEPWLTAYAMDFLIRAGEQGYSVPPEAINRGNERLLRYLQDPGTMLIRYSDNTQASTFAAQAYAALVLARQQKAPLGALREIWERRSQAASGLPLMQLGIALNTMGDARRGEEAITLALNTPRQDERQWIADYGSSLRDNALMLSLLEENNLRPDAQNALLSSLSEQAFGQRWLSTQENNALFLAAHSRQASAGAWQAQTSLEAQPLSGDKALTRNLDADQLAALEVTNTGSQPLWLRLDSSGYPSSAPEPASNVLQIERQILGTDGQRKSLSSLRSGELVLVWLTVVADRNVPDALVVDLLPAGLELENQNLADSSASLPESGSEVQNLLNQMQQADIQYMEFRDDRFVAAVVVNEGQPVTLVYLARAVTPGTYQLAQPQVESMYAPQWRATGASEGLLIVTP* +>13 +MAKNRSRRLRKKMHIDEFQELGFSVAWRFPEGTSEEQIDKTVDDFINDVIEPNKLAFDGSGYLAWEGLICMQEIGKCTEEHQAIVRKWLEARNLEEVRTSELFDVWWD* +>14 +LKLIPFYLLALFSASSGATEINACKDLIGTWKTPADNPPYTVTILPPVEACGEKCVKLNVEYELDVTHRNALYCHERQEGVKGQGPMVIAFEGAYGGHAIGTYNRQLQLLRAGVIPKNKKWKWITKMENYWFRRVKAH* +>15 +MKKFDSCHPVFLLIGCAQVPLRSSVSKPVQQPSAQKEQLANANGIDECQSLPYVPSDLAKNKSLSNQIADNTASKNSAISSRIFCEKYKQTKEQALTFFQELPQYMRSKEVEEQHMTEFKKVLLEPGSKNLSIYQTLLAAHERLQAL* +>16 +MRIKPDDNWRWYYDEEHDRMMLDLANGMLFRSRFSRKMLTPDAFCPTGFCVDDAALYFSFEEKCRDFELTKEQRAELVLNALVAIRYLKPQMPKSWHFVAHGEMWTPGTGDAASVWLSDTAEQVNLLVVEPGENAALCLLAQPGVVIAGRTMQLGDAIKIMNDRLKPQVHCHSFSLEQAV* +>17 +VLAFTLRFIKNKRYFAILAGALVIIAGLDSQHACSGNGLPQINGKALAALAKQHPVVVLFRHAERCDRSDNTCLPDSTGITVNGAQDARALGKAFSADIQNYNLYSSNTVRAIQSATWFSAGRSRRAVKKMMDCGSGIYSSINTLLKNSQIKNIVILTHNHCLTYIVKNKRGVKFDPEYLNALVMYAENGKLLLDGEFVPG* +>18 +MGSNYIVIEGLEGAGKTTARDVVVETLEQLGIRNMIFTREPGGTQLAEKLRSLVLDIRSVGDEVITDKAEVLMFYAARVQLVETVIKPALAQGVWVIGDRHDLSTQAYQGGGRGIDQTMLATLRDAVLGDFRPDLTLYLDVTPEVGLKRARARGDLDRIEQESFDFFNRTRARYLELAAQDSRIRTIDATQPLDAVMRDIRATVTKWVQEQAA* +>19 +MKHIKKSVLVVLLTSHVAHASIVVGGTRLVFDGNNDESSINVENKDSKANLVQSWLSVADPQVTNKQVFIITPPLFRLDAGQKNSIRVIRSGAPLPADRESMYWLNIKGIPSIDDNASANRVEISINTQIKLIYRPPALTKSTPDSQSQQLKWQTAGDVITVNNPTPYYMNFASVTLNSHEVKSATFVPPKSSASFKLSSTAAPHGTVTWRLISDYGMSLEPHSGSF* diff --git a/tests/test_data/outputs/search/G3/nucleotide/hsps.txt b/tests/test_data/outputs/search/G3/nucleotide/hsps.txt new file mode 100755 index 0000000..cdab2ba --- /dev/null +++ b/tests/test_data/outputs/search/G3/nucleotide/hsps.txt @@ -0,0 +1,20 @@ +0 0 102 102 1 102 1 102 102 0 100.000 100 100 plus 1.80e-51 189 +1 9 762 762 1 762 1 762 762 0 100.000 100 100 plus 0.0 1408 +2 10 858 858 1 858 1 858 858 19 97.786 100 100 plus 0.0 1480 +3 11 972 972 1 972 1 972 972 0 100.000 100 100 plus 0.0 1796 +4 12 1098 1098 1 1098 1 1098 1098 0 100.000 100 100 plus 0.0 2028 +5 13 1281 1281 1 1281 1 1281 1281 11 99.141 100 100 plus 0.0 2305 +6 14 1434 1434 1 1434 1 1434 1434 0 100.000 100 100 plus 0.0 2649 +7 15 1464 1464 1 1464 1 1464 1464 15 98.975 100 100 plus 0.0 2621 +8 16 1836 1836 1 1836 1 1836 1836 0 100.000 100 100 plus 0.0 3391 +9 17 1914 1914 1 1914 1 1914 1914 0 100.000 100 100 plus 0.0 3535 +10 18 2037 2037 1 2037 1 2037 2037 16 99.215 100 100 plus 0.0 3674 +11 1 285 285 1 285 1 285 285 17 94.035 100 100 plus 2.31e-124 433 +12 19 4935 4935 1 4935 1 4935 4935 0 100.000 100 100 plus 0.0 9114 +13 2 327 327 1 327 1 327 327 0 100.000 100 100 plus 5.34e-176 604 +14 3 417 417 1 417 1 417 417 11 97.362 100 100 plus 0.0 710 +15 4 444 444 1 444 1 444 444 15 96.622 100 100 plus 0.0 737 +16 5 543 543 1 543 1 543 543 0 100.000 100 100 plus 0.0 1003 +17 6 606 606 1 606 1 606 606 15 97.525 100 100 plus 0.0 1037 +18 7 642 642 1 642 1 642 642 0 100.000 100 100 plus 0.0 1186 +19 8 684 684 1 684 1 684 684 0 100.000 100 100 plus 0.0 1264 diff --git a/tests/test_data/outputs/search/G3/nucleotide/queries.fasta b/tests/test_data/outputs/search/G3/nucleotide/queries.fasta new file mode 100755 index 0000000..ce3c3a9 --- /dev/null +++ b/tests/test_data/outputs/search/G3/nucleotide/queries.fasta @@ -0,0 +1,40 @@ +>0 +atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa +>1 +atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa +>2 +gtgaacctggtgaaaaccacgggtctgcacgcggcgggcaaaggcattaacgttgccaaagtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccgacttcaacttttccggctttgacgtgactcccgcagactgggaacgctttgttaacgactccctgagctggctgggtcagttcgatatggtctgcgtcagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatctttgatagtagccgtgaagcgttagtcgccggtcttaaagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcacgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgtgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggtaagtaccgtcggcgcaggcgattccatggttggcggattgatttacggcctgctgatgcgcgagtccagcgaacatacgctgcgtctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga +>3 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>4 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>5 +ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctcttccgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccaacgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcagaaaggcgggattggtttgcgcggtccgggtgaaacccagctcgaaaccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgaaaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgctgctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaaatccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtctgcaggtacgaatgccgattgtcgactggcgtcgcctctgtaaacaagaaccagcgttgatcgaatacgtgatctag +>6 +gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga +>7 +atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctggtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatcagcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacgctgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcattcaaccggaatggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaacgctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgaggcgctggcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtgagctgaccagccttgtgcgcaaccttaatcaactgctcaaaagcgagcgcgaacgttataacaaataccgcacgaccctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatcagcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcataccccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttttggccgacagcatcccacacagaaagaggaataa +>8 +atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa +>9 +atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag +>10 +atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacattgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccaccgtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggccgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattactggaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcattcagggcggtacggaagtttgtcagcaaaaactggataccatgcgcaacgcaggcgtaaaagtgaacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggcgtcagcgcagagatcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacaggagaccggcaagctcggcgagatcctgttctttatgcgtgcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccggcaataactggcagttcgacggcgacgcggaaactattgcccactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgaaattaccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa +>11 +atgaaactttttctgacaacggcagcgctaactgcgaccttgatttccggcatggcatttgccagcgatcccgtcatcccctgggcgactaacagcggcggtacggaaagcacgcatattgcggcaatgggcgaggatttgaatgcgcagcatcagcagatcacccacacgcatgaaggcgtatgggcggccaattccggcagcattcaggctgatgaagccgcgctgaccagcaacaagccgccggtacaaggccacccggaattgatgccgcatcagggctaa +>12 +atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa +>13 +atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa +>14 +ctgaaactaatacctttttatttgctcgcgctattttctgccgcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaaccactgcagataacccgccttataccatgacaatattgccgccagtagaaggctgcggggaaaaatgtgtgaaactgaacgtacagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaaggacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctataaccgacagctgcaattactttgggctggcgtaataccgaaaaataaaaaggggaaatggataacgaaaatggaaaattactggttcaggcaggtaaaagcgcactaa +>15 +atgaaaaaattttatagctgtcttcctgtctttttactgatcggctgtgcccaggtgcccctcccttcctccgtgagcaaaccggtacagcaacctggcgctcagaaagagcaactggccaacgcaaatagtattgatgagtgtcagtctcttccgtatgtgccgtcagaccttgcgaagaataaatcattatcaaaccagaacgctgataattccgcatcaaaaaatagcgcaatcagctcaagcattttttgcgaaaaatataaacaaaccaaagaacaggcgcttaccttcttccaggaacatccacaatacatgcgttcgaaagaagatgaagagcaactcatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa +>16 +atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa +>17 +gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggcctcacaacatgcctggagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgtcagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcaccatccagtcggcaacctggttttccgcaggcaggtcgcttacggcggataaaaaaatgatggattgcggcagcggtatttatgcttcaatcaatacgttactgaaaaagtcgcagaataaaaatatagtcatttttacccataatcattgtttgacgtatattgcaaaaaataagcgcggcgtgaagtttgatccggactatcttaatgcgctcgttatgtatgcggaaaacggaaaactctttttagacggtgaatttgttcccggctag +>18 +atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga +>19 +atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/search/G3/protein/hsps.txt b/tests/test_data/outputs/search/G3/protein/hsps.txt new file mode 100755 index 0000000..233979d --- /dev/null +++ b/tests/test_data/outputs/search/G3/protein/hsps.txt @@ -0,0 +1,20 @@ +0 0 34 34 1 34 1 34 34 0 100.000 100 100 N/A 4.94e-20 64.3 +1 9 254 254 1 254 1 254 254 0 100.000 100 100 N/A 0.0 514 +2 10 286 286 1 286 1 286 286 17 94.056 100 100 N/A 0.0 518 +3 11 324 324 1 324 1 324 324 0 100.000 100 100 N/A 0.0 640 +4 12 366 366 1 366 1 366 366 0 100.000 100 100 N/A 0.0 754 +5 13 427 427 1 427 1 427 427 9 97.892 100 100 N/A 0.0 846 +6 14 478 478 1 478 1 478 478 0 100.000 100 100 N/A 0.0 972 +7 15 488 488 1 488 1 488 488 14 97.131 100 100 N/A 0.0 972 +8 16 612 612 1 612 1 612 612 0 100.000 100 100 N/A 0.0 1241 +9 17 638 638 1 638 1 638 638 0 100.000 100 100 N/A 0.0 1326 +10 18 679 679 1 679 1 679 679 8 98.822 100 100 N/A 0.0 1403 +11 1 95 95 1 95 1 95 95 11 88.421 100 100 N/A 2.89e-51 147 +12 19 1645 1645 1 1645 1 1645 1645 0 100.000 100 100 N/A 0.0 3332 +13 2 109 109 1 109 1 109 109 0 100.000 100 100 N/A 3.09e-81 224 +14 3 139 139 1 139 1 139 139 9 93.525 100 100 N/A 3.70e-98 269 +15 4 148 148 1 148 1 148 148 11 92.568 100 100 N/A 1.56e-97 268 +16 5 181 181 1 181 1 181 181 0 100.000 100 100 N/A 1.79e-140 380 +17 6 202 202 1 202 1 202 202 14 93.069 100 100 N/A 2.36e-141 384 +18 7 214 214 1 214 1 214 214 0 100.000 100 100 N/A 1.75e-157 426 +19 8 228 228 1 228 1 228 228 0 100.000 100 100 N/A 6.32e-172 463 diff --git a/tests/test_data/outputs/search/G3/protein/queries.fasta b/tests/test_data/outputs/search/G3/protein/queries.fasta new file mode 100755 index 0000000..3ac162b --- /dev/null +++ b/tests/test_data/outputs/search/G3/protein/queries.fasta @@ -0,0 +1,40 @@ +>0 +MYDPPFLEALMITASFFAIFIIIVVSVLLLEGD* +>1 +MRILLSNDDGVHAPGIQTLAKALREFADVQVVAPDRNRSGASNSLTLESSLRTFTFDNGDIAVQMGTPTDCVYLGVNALMRPRPDIVVSGINAGPNLGDDVIYSGTVAAAMEGRHLGFPALAVSLNGYQHYDTAAAVTCALLRGLSREPLRTGRILNVNVPDLPLAQVKGIRVTRCGSRHPADKVIPQEDPRGNTLYWIGPPGDKYDAGPDTDFAAVDEGYVSVTPLHVDLTAHSAHDVVSDWLDSVGVGTQW* +>2 +VNLVKTTGLHAAGKGINVAKVLKDLGIDVTVGGFLGKDNQDGFQQLFSELGIANRFQVVQGRTRINVKLTEKDGEVTDFNFSGFDVTPADWERFVNDSLSWLGQFDMVCVSGSLPAGVSPEAFTDWMTRLRSQCPCIIFDSSREALVAGLKAAPWLVKPNRRELEIWAGRKLPEMKDVIDAAHALREQGIAHVVISLGAEGALWVNASGEWIAKPPAVDVVSTVGAGDSMVGGLIYGLLMRESSEHTLRLATAVAALAVSQSNVGITDRPQLAAMMARVDLQPFN* +>3 +MNKNKYSTPLLMLATILAGMLSPMQSAVNGQLGHWLQDGNACAVISFASGLVVMFFIIIARKETRQQFASIPTLIKKRKIPLWNWFAGLCGAMVVFSEGASASALGVATFQTALISALLLSGLLCDRFGIGVEEKKYFTPWRITGALFAVIATIFVVSPQWHSTSFILLAILPFLAGLLAGWQPAGNAKVAEATGSMLVSITWNFIVGFCVLGAALAIRIALGHVTIQLPDTWWMYLGGPLGLLSIGLMAILVRGLGLLMLGVASTAGQLLGSVLIDELIPSLGNTVYLVTIIGTLFALVGAIVTTIPEYRASKMAQKMEVSE* +>4 +MKGRWAKYVATGVMLAMLAACSSKPTDRGQQYKDGKFTQPFSLVNQPDAVGAPINAGDFAEQVDQIRSASPRLYTNQSNVYNAVQNWLRSGGDTRTMRQFGIDAWQMEGTDNYGNVQFTGYYTPVVQARHTRQGAFQYPIYSMPPKRGRLPSRAQIYAGALSDKYILAWSNSLMDNFIMDVQGSGYIDFGDGSPLNFFSYAGKNGWPYRSIGKVLIDRGEVKKEDMSMQAIREWGEKHSEAEVRELLEQNPSFVFFKPQSFAPVKGASAVPLIGRASVASDRSIIPPGTTLLAEVPLLDNNGKFSGQYELRLMVALDVGGAIKGQHFDIYQGIGPDAGHRAGWYNHYGRVWVLKSAPGAGNVFSG* +>5 +LFDRYDAGEQAVLVHIYFSQDKDMEDLQEFESLVSSAGVEAMQVITGSRKAPHPKYFVGEGKAVEIAEAVKATGAAVVLFDHALSPAQERNLERLCECRVIDRTGLILDIFAQRARTHEGKLQVELAQLRHLATRLVRGWTHLERQKGGIGLRGPGETQLETDRRLLRNRIVQIQSRLEKVEKQREQGRQSRIKADVPTVSLVGYTNAGKSTLFNQITEARVYAADQLFATLDPTLRRIDVADVGETVLADTVGFIRHLPYDLVAAFKATLQETRQATLLLHVVDAADVRVQENIEAVNTVLEEIDAHEIPTLMVMNKIDMLDDFEPRIDRDEENKPIRVWLSAQSGVGIPQLFQALTERLSGEVAQHTLRLLPQEGRLRSRFYQLQAIEKEWMEEDGSVSLQVRMPIVDWRRLCKQEPALIEYVI* +>6 +VAQRILVLGASGYIGQHLVFALSQQGHQVRAAARRVERLEKHRLANVSCHKVDLHWPENLPALLRDIDTVYYLVHGMGEGGDFIAHERQAALNVRDALRQTPVKQLIFLSSLQAPAHEQSDHLRARQLTADTLRDAGVPVTELRAGIIVGAGSAAFEVMRDMVYNLPILTPPRWVRSRTTPIALENLLYYLVGLLEHPAHEHRILEAAGPQVLSYQQQFERFMAVSGKRRPLIPVPFPTRWISVWFLNVITSVPPTTAKALIQGLRHDLLADDAALKKLIPQTLITFDDAVRRTLKEEEKLVNSSDWGYDALAFARWRPEYGYFPKQAGFTAQTPASLSALWQVVNRLGGKEGYFFGNILWQTRAAMDRLVGHKLAKGRPSHTLLKPGDTVDSWKVIIVEPEKQLTLLFGMKAPGLGRLSFTLHDKGRYREIDVRAWWHPHGMPGLIYWLLMIPAHLFIFRGMARRIARLAEQITEK* +>7 +MNKFARHFLPLSLRVRFLLATAGVVLVLSLAYGIVALVGYSVSFDKTTFRLLRGESNLFYTLAKWENNKISVELPENLDMQSPTMTLIYDETGKLLWTQRNIPWLIKSIQPEWLKTNGFHEIETNVDATSTLLSEDHSAQEKLKEVREDDDDAEMTHSVAVNIYPATTRMPQLTIVVVDTIPIELKRSYMVWSWFVYVLAANLLLVIPLLWIAAWWSLRPIEALAREVRELEDHHREMLNPETTRELTSLVRNLNQLLKSERERYNKYRTTLTDLTHSLKTPLAVLQSTLRSLRNEKMSVSKAEPVMLEQISRISQQIGYYLHRASMRGSGVLLSRELHPVAPLLDNLISALNKVYQRKGVNISMDISPEISFVGEQNDFVEVMGNVLDNACKYCLEFVEISARQTDDHLHIFVEDDGPGIPHSKRSLVFDRGQRADTLRPGQGVGLAVAREITEQYAGQIIASDSLLGGARMEVVFGRQHPTQKEE* +>8 +MTIQKRLLEAVEQKLLRPIDAQFALTVAGNDDPAVTLAAALLSHDAGEGHVCLPLSRLTLTEEAHPLLVAWISETATPIDWKKRLLASAAVSCGDSPAPLILCGDRLYLNRMWCNERTVARFFNEVNQAIAVDEDQLSRILDALFPPTDEVNWQKVAAAVALTRRISVISGGPGTGKTTTVAKLLAALIQMADGERCRIRLAAPTGKAAARLTESLGAALRQLPLTDAQKKRIPEDASTLHRLLGAQPGSQRLRHHAGNPLHLDVLVVDEASMIDLPMMSRLIDALPPHGRVIFLGDRDQLASVEAGAVLGDICAYVNAGFTAERARQLSRLTGSAIPAGAGTQAASLRDSLCLLQKSYRFGSDSGIGKLAAAINCGDRSAIQAVFQQGFSDIEKRTLQSSDDYAGMLDEALAGYGRYLRLLHEKAAPEAILQAFNEYQLLCALREGPFGVRGLNDRIEQAMVQQRKIQRHPHSRWYEGRPVMIARNDSALGLFNGDIGIALDRGQGLRVWFVMPDGTIKSVQPSRLPEHDTTWAMTVHKSQGSEFDHAALILPSQRSPVVTRELVYTAVTRARRRLSLYADERILAGAIVTRTERRSGLATLFDEVSRIG* +>9 +MQEVAMSSQEASKMLRTYNIAWWGNNYYDVNELGHISVCPDPDVPEARVDLAKLVKAREAQGQRLPALFCFPQILQHRLRSINAAFKRARESYGYNGDYFLVYPIKVNQHRRVIESLIHSGEPLGLEAGSKAELMAVLAHAGMTRSVIVCNGYKDREYIRLALIGEKMGHKVYLVIEKMSEIAIVLEEAERLNVVPRLGVRARLASQGSGKWQSSGGEKSKFGLAATQVLQLVETLRDAGRLDSLQLLHFHLGSQMANIRDIATGVRESARFYVELHKLGVNIQCFDVGGGLGVDYEGTRSQSDCSVNYGLNEYANNIIWAIGDACEEHGLPHPTVITESGRAVTAHHTVLVSNIIGVERNEYTDPTAPAEDAPRALQNLWETWQEMHKPGTRRSLREWLHDSQMDLHDIHIGYSSGAFSLQERAWAEQLYLSMCHEVQKQLDPQNRAHRPIIDELQERMADKMYVNFSLFQSMPDAWGIDQLFPVLPLEGLDQVPERRAVLLDITCDSDGAIDHYIDGDGIATTMPMPEYDPENPPMLGFFMVGAYQEILGNMHNLFGDTEAVDVFVFPDGSVEVELSDEGDTVADMLQYVQLDPKTLLTHFRDQVKQTDLDDALQQQFLEEFEAGLYGYTYLEDE* +>10 +MNSLPQRSTDFELTTSQDGFALSWQQRLILRHSAENPCLWIGAGVADIDMFRGNFSIKDKLNEKIALTEATVSELPDGWLVQFSRGATISATLRISADEAGRLTLDLQNDDLHHNRIWLRLAANPDDHIYGCGEQFSYFDLRGKPFPLWTSEQGVGRNKTSYVTWQADCKENAGGDYYWTFFPQPTFVSTQKYYCHVDNSCYMNFDFSAPEYHELALWEDKTTLRFECADTYIALLEKLTALLGRQPELPDWVYDGVTLGIQGGTEVCQQKLDTMRNAGVKVNGIWAQDWSGIRMTSFGKRVMWNWKWNSDNYPQLDSRIKQWKEEGVQFLSYINPYVASDKDLCAEAARHGYLAKDATGGDYLVEFGEFYGGVVDLTNPEAYDWFKDVIKKNMIALGCSGWMADFGEYLPTDTYLHNGVSAEIMHNAWPALWAKCNYEALQETGKLGEILFFMRAGYTGSQKYSTMMWAGDQNVDWSLDDGLASVVPAALSLAMTGHGLHHSDIGGYTTLFDMKRSKELLLRWCDFSAFTPMMRTHEGNRPGNNWQFDGDAETIAHFARMTTVFTTLKPYLKQAVAQNAATGLPVMRPLFLHYENDAATYTLKYQYLLGQDLLVAPVHEQGRCDWTLYLPEDHWVNIWTGEAHHGGEITVDAPIGKPPVFYRAKSEWALLFASLRNI* +>11 +MKLFLTTAALTATLISGMAFASDPVIPWATNSGGTESTHIAAMGEDLNAQHQQITHTHEGVWAANSGSIQADEAALTSNKPPVQGHPELMPHQG* +>12 +MKHLRVVACMIMLALAGCDNNDKTAPTTKSEAPAVAQPSPAQDPSQLQKLAQQSQGKALTLLDASEAQLDGAATLVLTFSIPLDPEQDFSRVVHVVDKKSGSVDGAWELAPNLKELRLRHLEPERVLVVTVDPAVKALNNATFGKSYEKTITTRDVQPSVGFASRGSLLPGKIAEGLPVMALNVNHVDVNFFRVKPGSLASFVSQWEYRSSLSNWESDNLLKMADLVYTGRFDLNPARNTREKLLLPLSDIKPLQQAGVYVAVMNQAGHYNYSNAATLFTLSDIGVSAHRYHSRLDIFTQSLENGAAQSGIEIVLLNDKGQTLAQATSDAQGHVQLEADKAAALLLARKEGQTTLLDLTLPALDLSEFNVAGAPGYSKQFFMFGPRDLYRPGETVILNGLLRDSDGKTLPDQPVKLEVVKPDGQVMRTVVSQPENGLYRLNYPLDINAPTGLWHVRANTGDNLLRSWDFHVEDFMPERMALNLTAQKTPLAPADEVKFSVVGYYLYGAPANGNTLQGQLFLRPLRDAVAALPGFQFGNIAEENLSRSLDEVQLTLDKGGRGEVSAASQWQEAHSPLQVILQASLLESGGRPVTRRVEQAIWPADTLPGIRPQFAAKAVYDYRTDTTVNQPIVDEDSNAAFDIVYANAQGEKKAVSGLQVRLIRERRDYYWNWSESEGWQSQFDQKDLVEGEQTLDLNADETGKVSFPVEWGAYRLEVKAPNETVSSVRFWAGYSWQDNSDGSGAARPDRVTLKLDKANYRPGDTMKLHIAAPVAGKGYAMVESSDGPLWWQAIDVPAQGLELTIPVDKTWNRHDLYLSTLVVRPGDKSRSATPKRAVGLLHLPLGDDNRRLDLALESPAKMRPNQPLTVRVKASVKHGEMPKQINVLVSAVDSGVLNITDYATPDPWQAFFGQKRYGADIYDIYGQVIEGQGRLAALRFGGDGDDLTRGGKPPVNHANIIAQQAQPITLNEQGEGVVTLPIGDFNGELRVMAQAWTADDFGRGESKVVVAAPVIAELNMPRFLAGGDVSRLVLDVTNLTDRPQTLNIALAASGLLELLSQQPQPVNLAPGVRTTLFVPVRALEGFGEGEIQATISGLNLPGETLGAQHKQWQIGVRPAWPAQTVNSGIALAPGESWHVPEQHLANVSPATLQGQLLLSGKPPLNLARYIRELKAYPYGCLEQTTSGLFPALYTNAAQLQSLGITGDSDEKRRAAVDIGISRILQMQRDNGGFALWDENGAEEPWLTAYAMDFLIRAGEQGYSVPPEAINRGNERLLRYLQDPGTMLIRYSDNTQASTFAAQAYAALVLARQQKAPLGALREIWERRSQAASGLPLMQLGIALNTMGDARRGEEAITLALNTPRQDERQWIADYGSSLRDNALMLSLLEENNLRPDAQNALLSSLSEQAFGQRWLSTQENNALFLAAHSRQASAGAWQAQTSLEAQPLSGDKALTRNLDADQLAALEVTNTGSQPLWLRLDSSGYPSSAPEPASNVLQIERQILGTDGQRKSLSSLRSGELVLVWLTVVADRNVPDALVVDLLPAGLELENQNLADSSASLPESGSEVQNLLNQMQQADIQYMEFRDDRFVAAVVVNEGQPVTLVYLARAVTPGTYQLAQPQVESMYAPQWRATGASEGLLIVTP* +>13 +MAKNRSRRLRKKMHIDEFQELGFSVAWRFPEGTSEEQIDKTVDDFINDVIEPNKLAFDGSGYLAWEGLICMQEIGKCTEEHQAIVRKWLEARNLEEVRTSELFDVWWD* +>14 +LKLIPFYLLALFSAASGATEINACKDLIGTWKTTADNPPYTMTILPPVEGCGEKCVKLNVQYELDVTHRNALYCHEGQEGVKGQGPMVIAFEGAYGGHAIGTYNRQLQLLWAGVIPKNKKGKWITKMENYWFRQVKAH* +>15 +MKKFYSCLPVFLLIGCAQVPLPSSVSKPVQQPGAQKEQLANANSIDECQSLPYVPSDLAKNKSLSNQNADNSASKNSAISSSIFCEKYKQTKEQALTFFQEHPQYMRSKEDEEQLMTEFKKVLLEPGSKNLSIYQTLLAAHERLQAL* +>16 +MRIKPDDNWRWYYDEEHDRMMLDLANGMLFRSRFSRKMLTPDAFCPTGFCVDDAALYFSFEEKCRDFELTKEQRAELVLNALVAIRYLKPQMPKSWHFVAHGEMWTPGTGDAASVWLSDTAEQVNLLVVEPGENAALCLLAQPGVVIAGRTMQLGDAIKIMNDRLKPQVHCHSFSLEQAV* +>17 +VLAFTLRFIKNKRYFAILAGALVIIAGLASQHAWSGNGLPQINGKALAALAKQHPVVVLFRHAERCDRSDNTCLSDSTGITVNGAQDARALGKAFSADIQNYNLYSSNTVRTIQSATWFSAGRSLTADKKMMDCGSGIYASINTLLKKSQNKNIVIFTHNHCLTYIAKNKRGVKFDPDYLNALVMYAENGKLFLDGEFVPG* +>18 +MGSNYIVIEGLEGAGKTTARDVVVETLEQLGIRNMIFTREPGGTQLAEKLRSLVLDIRSVGDEVITDKAEVLMFYAARVQLVETVIKPALAQGVWVIGDRHDLSTQAYQGGGRGIDQTMLATLRDAVLGDFRPDLTLYLDVTPEVGLKRARARGDLDRIEQESFDFFNRTRARYLELAAQDSRIRTIDATQPLDAVMRDIRATVTKWVQEQAA* +>19 +MKHIKKSVLVVLLTSHVAHASIVVGGTRLVFDGNNDESSINVENKDSKANLVQSWLSVADPQVTNKQVFIITPPLFRLDAGQKNSIRVIRSGAPLPADRESMYWLNIKGIPSIDDNASANRVEISINTQIKLIYRPPALTKSTPDSQSQQLKWQTAGDVITVNNPTPYYMNFASVTLNSHEVKSATFVPPKSSASFKLSSTAAPHGTVTWRLISDYGMSLEPHSGSF* diff --git a/tests/test_data/outputs/search/G4/nucleotide/hsps.txt b/tests/test_data/outputs/search/G4/nucleotide/hsps.txt new file mode 100755 index 0000000..00093e5 --- /dev/null +++ b/tests/test_data/outputs/search/G4/nucleotide/hsps.txt @@ -0,0 +1,20 @@ +0 0 102 102 1 102 1 102 102 0 100.000 100 100 plus 1.80e-51 189 +1 9 762 762 1 762 1 762 762 0 100.000 100 100 plus 0.0 1408 +2 10 858 858 1 858 1 858 858 0 100.000 100 100 plus 0.0 1585 +3 11 972 972 1 972 1 972 972 0 100.000 100 100 plus 0.0 1796 +4 12 1098 1098 1 1098 1 1098 1098 0 100.000 100 100 plus 0.0 2028 +5 13 1281 1281 1 1281 1 1281 1281 0 100.000 100 100 plus 0.0 2366 +6 14 1434 1434 1 1434 1 1434 1434 0 100.000 100 100 plus 0.0 2649 +7 15 1464 1464 1 1464 1 1464 1464 0 100.000 100 100 plus 0.0 2704 +8 16 1836 1836 1 1836 1 1836 1836 0 100.000 100 100 plus 0.0 3391 +9 17 1914 1914 1 1914 1 1914 1914 0 100.000 100 100 plus 0.0 3535 +10 18 2037 2037 1 2037 1 2037 2037 0 100.000 100 100 plus 0.0 3762 +11 1 285 285 1 285 1 285 285 0 100.000 100 100 plus 1.03e-152 527 +12 19 4935 4935 1 4935 1 4935 4935 0 100.000 100 100 plus 0.0 9114 +13 2 327 327 1 327 1 327 327 0 100.000 100 100 plus 5.34e-176 604 +14 3 417 417 1 417 1 417 417 0 100.000 100 100 plus 0.0 771 +15 4 444 444 1 444 1 444 444 0 100.000 100 100 plus 0.0 821 +16 5 543 543 1 543 1 543 543 0 100.000 100 100 plus 0.0 1003 +17 6 606 606 1 606 1 606 606 0 100.000 100 100 plus 0.0 1120 +18 7 642 642 1 642 1 642 642 0 100.000 100 100 plus 0.0 1186 +19 8 684 684 1 684 1 684 684 0 100.000 100 100 plus 0.0 1264 diff --git a/tests/test_data/outputs/search/G4/nucleotide/queries.fasta b/tests/test_data/outputs/search/G4/nucleotide/queries.fasta new file mode 100755 index 0000000..9cad9a4 --- /dev/null +++ b/tests/test_data/outputs/search/G4/nucleotide/queries.fasta @@ -0,0 +1,40 @@ +>0 +atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa +>1 +atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa +>2 +gtgaacctggtgaaaaccccgggtctgcacgcggcgggcaaaggcattaacgttgccaatgtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccaacttcaactttcccggctttgacgtgactcccgcagactgggaacgctttgttaacgacttcctgagctggctgggtcagttcgatatggtctgcgtaagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatcgttgatactagccgtgtagagttagtcgccggtcttatagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcccgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgcgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggaaaataccgtcggcgcaggcggttccatggttggcggatggatttacggcctgctgatgcgcgagtccaccgaacatacgctgcgcctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga +>3 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>4 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>5 +ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctattacgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccagcgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcggaaaggcgggattggttcgcgcggtccgggtgaaacccagctcgaagccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgagaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgcagctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaattccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtccgcaggtacgaatgccgattgtcgactggcgtcgcttctgtaaacaagaaccagcgttgatcgaatacgtgatctag +>6 +gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga +>7 +atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctcgtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatctgcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacggtgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcactcaaccggaagggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaaggctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgatgcgctgtcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtaagctgatcagccttgtgcgcaaccttaatcaactgctcaaaagcgagcacgaacgttataacaaataccgcacgagcctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatctgcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcatagcccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttctggccgacagcatcccacacagaaagaggaataa +>8 +atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa +>9 +atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag +>10 +atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacatcgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccacagtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggtcgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattacttgaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcatacagggcggtacggaagtttgtcagcaaaaactggataacatgcgcaacgcaggcgtaaaagtgtacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggggtcagcgcagagctcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacagaagaccggcaagctcggcgagatcctgttctttatgcgggcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccagcaatgactggcagttcgacggcgacgcggaaactattgcacactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgagatttccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa +>11 +atgaaactttttctgacaacggcagcgctaactgcgaccttgacttccggcatgggatttgccagcgataccgtgatcccctgggcgactaacagtggcggtacggaaagcacgcatatcgcggcaatgggcgaggatgtgaatgcgcagcatcagcacatcatccacacgcatgaaggcgtatgtgccgcaaattccggcaccattcaggctgatgaagccgcgctgaccagcaacaagccaccggtacaagtccaaccggaattgttgccgcatcagggctaa +>12 +atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa +>13 +atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa +>14 +ctgaaactaatacctttttatttgctcgcgctattttctgcctcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaacccctgcagataacccgccttataccgtgacaatattgccgccagtagaagcctgcggggaaaaatgtgtgaaactgaacgtagagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaacgacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctacaaccgacagctgcaattacttagggctggcgtaataccgaaaaataaaaagtggaaatggataacgaagatggaaaattactggttcaggcgggtaaaagcgcactaa +>15 +atgaaaaaatttgatagctgtcatcctgtctttttactgatcggctgtgcccaggtgcctctccgttcctccgtgagcaaaccggtacagcaacctagcgctcagaaagagcaactggccaacgcaaatggtattgatgagtgtcagtcacttccgtatgtgccttcagaccttgcgaagaataaatcattatcaaaccagatcgctgataataccgcatcaaaaaatagcgcaatcagctcacgcattttttgcgaaaaatataagcaaaccaaagaacaggcgcttaccttcttccaggaacttccacaatacatgcgttcgaaagaagttgaagagcaacacatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa +>16 +atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa +>17 +gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggactcacaacatgcctgtagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgccagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcgccatccagtcggcaacctggttttcggcaggcaggtcgcgtagggcggttaaaaaaatgatggattgcggcagcggtatttattcttcaatcaatacgttactgaaaaattcgcagattaaaaatatagtcattttaacccataatcattgtttgacgtatattgtaaaaaataagcgcggcgtgaagtttgatccggagtatcttaatgcgctcgttatgtatgcggaaaacggaaaactccttttagacggtgaatttgttcccggctag +>18 +atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga +>19 +atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/search/G4/protein/hsps.txt b/tests/test_data/outputs/search/G4/protein/hsps.txt new file mode 100755 index 0000000..73033ac --- /dev/null +++ b/tests/test_data/outputs/search/G4/protein/hsps.txt @@ -0,0 +1,20 @@ +0 0 34 34 1 34 1 34 34 0 100.000 100 100 N/A 4.94e-20 64.3 +1 9 254 254 1 254 1 254 254 0 100.000 100 100 N/A 0.0 514 +2 10 286 286 1 286 1 286 286 0 100.000 100 100 N/A 0.0 579 +3 11 324 324 1 324 1 324 324 0 100.000 100 100 N/A 0.0 640 +4 12 366 366 1 366 1 366 366 0 100.000 100 100 N/A 0.0 754 +5 13 427 427 1 427 1 427 427 0 100.000 100 100 N/A 0.0 869 +6 14 478 478 1 478 1 478 478 0 100.000 100 100 N/A 0.0 972 +7 15 488 488 1 488 1 488 488 0 100.000 100 100 N/A 0.0 1004 +8 16 612 612 1 612 1 612 612 0 100.000 100 100 N/A 0.0 1241 +9 17 638 638 1 638 1 638 638 0 100.000 100 100 N/A 0.0 1326 +10 18 679 679 1 679 1 679 679 0 100.000 100 100 N/A 0.0 1419 +11 1 95 95 1 95 1 95 95 0 100.000 100 100 N/A 1.23e-68 191 +12 19 1645 1645 1 1645 1 1645 1645 0 100.000 100 100 N/A 0.0 3332 +13 2 109 109 1 109 1 109 109 0 100.000 100 100 N/A 3.09e-81 224 +14 3 139 139 1 139 1 139 139 0 100.000 100 100 N/A 2.34e-106 290 +15 4 148 148 1 148 1 148 148 0 100.000 100 100 N/A 1.04e-110 302 +16 5 181 181 1 181 1 181 181 0 100.000 100 100 N/A 1.79e-140 380 +17 6 202 202 1 202 1 202 202 0 100.000 100 100 N/A 3.71e-154 416 +18 7 214 214 1 214 1 214 214 0 100.000 100 100 N/A 1.75e-157 426 +19 8 228 228 1 228 1 228 228 0 100.000 100 100 N/A 6.32e-172 463 diff --git a/tests/test_data/outputs/search/G4/protein/queries.fasta b/tests/test_data/outputs/search/G4/protein/queries.fasta new file mode 100755 index 0000000..34499a1 --- /dev/null +++ b/tests/test_data/outputs/search/G4/protein/queries.fasta @@ -0,0 +1,40 @@ +>0 +MYDPPFLEALMITASFFAIFIIIVVSVLLLEGD* +>1 +MRILLSNDDGVHAPGIQTLAKALREFADVQVVAPDRNRSGASNSLTLESSLRTFTFDNGDIAVQMGTPTDCVYLGVNALMRPRPDIVVSGINAGPNLGDDVIYSGTVAAAMEGRHLGFPALAVSLNGYQHYDTAAAVTCALLRGLSREPLRTGRILNVNVPDLPLAQVKGIRVTRCGSRHPADKVIPQEDPRGNTLYWIGPPGDKYDAGPDTDFAAVDEGYVSVTPLHVDLTAHSAHDVVSDWLDSVGVGTQW* +>2 +VNLVKTPGLHAAGKGINVANVLKDLGIDVTVGGFLGKDNQDGFQQLFSELGIANRFQVVQGRTRINVKLTEKDGEVTNFNFPGFDVTPADWERFVNDFLSWLGQFDMVCVSGSLPAGVSPEAFTDWMTRLRSQCPCIIVDTSRVELVAGLIAAPWLVKPNRRELEIWAGRKLPEMKDVIDAAPALREQGIAHVVISLGAEGALRVNASGEWIAKPPAVDVENTVGAGGSMVGGWIYGLLMRESTEHTLRLATAVAALAVSQSNVGITDRPQLAAMMARVDLQPFN* +>3 +MNKNKYSTPLLMLATILAGMLSPMQSAVNGQLGHWLQDGNACAVISFASGLVVMFFIIIARKETRQQFASIPTLIKKRKIPLWNWFAGLCGAMVVFSEGASASALGVATFQTALISALLLSGLLCDRFGIGVEEKKYFTPWRITGALFAVIATIFVVSPQWHSTSFILLAILPFLAGLLAGWQPAGNAKVAEATGSMLVSITWNFIVGFCVLGAALAIRIALGHVTIQLPDTWWMYLGGPLGLLSIGLMAILVRGLGLLMLGVASTAGQLLGSVLIDELIPSLGNTVYLVTIIGTLFALVGAIVTTIPEYRASKMAQKMEVSE* +>4 +MKGRWAKYVATGVMLAMLAACSSKPTDRGQQYKDGKFTQPFSLVNQPDAVGAPINAGDFAEQVDQIRSASPRLYTNQSNVYNAVQNWLRSGGDTRTMRQFGIDAWQMEGTDNYGNVQFTGYYTPVVQARHTRQGAFQYPIYSMPPKRGRLPSRAQIYAGALSDKYILAWSNSLMDNFIMDVQGSGYIDFGDGSPLNFFSYAGKNGWPYRSIGKVLIDRGEVKKEDMSMQAIREWGEKHSEAEVRELLEQNPSFVFFKPQSFAPVKGASAVPLIGRASVASDRSIIPPGTTLLAEVPLLDNNGKFSGQYELRLMVALDVGGAIKGQHFDIYQGIGPDAGHRAGWYNHYGRVWVLKSAPGAGNVFSG* +>5 +LFDRYDAGEQAVLVHIYFSQDKDMEDLQEFESLVYYAGVEAMQVITGSRKAPHPKYFVGEGKAVEIAEAVKATGAAVVLFDHALSPAQERNLERLCECRVIDRTGLILDIFAQRARTHEGKLQVELAQLRHLATRLVRGWTHLERRKGGIGSRGPGETQLEADRRLLRNRIVQIQSRLEKVEKQREQGRQSRIKADVPTVSLVGYTNAGKSTLFNQITEARVYAADQLFATLDPTLRRIDVADVGETVLADTVGFIRHLPYDLVAAFKATLQETRQATQLLHVVDAADVRVQENIEAVNTVLEEIDAHEFPTLMVMNKIDMLDDFEPRIDRDEENKPIRVWLSAQSGVGIPQLFQALTERLSGEVAQHTLRLLPQEGRLRSRFYQLQAIEKEWMEEDGSVSPQVRMPIVDWRRFCKQEPALIEYVI* +>6 +VAQRILVLGASGYIGQHLVFALSQQGHQVRAAARRVERLEKHRLANVSCHKVDLHWPENLPALLRDIDTVYYLVHGMGEGGDFIAHERQAALNVRDALRQTPVKQLIFLSSLQAPAHEQSDHLRARQLTADTLRDAGVPVTELRAGIIVGAGSAAFEVMRDMVYNLPILTPPRWVRSRTTPIALENLLYYLVGLLEHPAHEHRILEAAGPQVLSYQQQFERFMAVSGKRRPLIPVPFPTRWISVWFLNVITSVPPTTAKALIQGLRHDLLADDAALKKLIPQTLITFDDAVRRTLKEEEKLVNSSDWGYDALAFARWRPEYGYFPKQAGFTAQTPASLSALWQVVNRLGGKEGYFFGNILWQTRAAMDRLVGHKLAKGRPSHTLLKPGDTVDSWKVIIVEPEKQLTLLFGMKAPGLGRLSFTLHDKGRYREIDVRAWWHPHGMPGLIYWLLMIPAHLFIFRGMARRIARLAEQITEK* +>7 +MNKFARHFLPLSLRVRFLLATAGVVLVLSLAYGIVALVGYSVSFDKTTFRLLRGESNLFYTLAKWENNKICVELPENLDMQSPTMTVIYDETGKLLWTQRNIPWLIKSTQPEGLKTNGFHEIETNVDATSTLLSEDHSAQEKLKEVREDDDDAEMTHSVAVNIYPATTRMPQLTIVVVDTIPIELKGSYMVWSWFVYVLAANLLLVIPLLWIAAWWSLRPIDALSREVRELEDHHREMLNPETTRKLISLVRNLNQLLKSEHERYNKYRTSLTDLTHSLKTPLAVLQSTLRSLRNEKMSVSKAEPVMLEQISRISQQIGYYLHRASMRGSGVLLSRELHPVAPLLDNLISALNKVYLRKGVNISMDISPEISFVGEQNDFVEVMGNVLDNACKYCLEFVEISARQTDDHLHIFVEDDGPGIAHSKRSLVFDRGQRADTLRPGQGVGLAVAREITEQYAGQIIASDSLLGGARMEVVSGRQHPTQKEE* +>8 +MTIQKRLLEAVEQKLLRPIDAQFALTVAGNDDPAVTLAAALLSHDAGEGHVCLPLSRLTLTEEAHPLLVAWISETATPIDWKKRLLASAAVSCGDSPAPLILCGDRLYLNRMWCNERTVARFFNEVNQAIAVDEDQLSRILDALFPPTDEVNWQKVAAAVALTRRISVISGGPGTGKTTTVAKLLAALIQMADGERCRIRLAAPTGKAAARLTESLGAALRQLPLTDAQKKRIPEDASTLHRLLGAQPGSQRLRHHAGNPLHLDVLVVDEASMIDLPMMSRLIDALPPHGRVIFLGDRDQLASVEAGAVLGDICAYVNAGFTAERARQLSRLTGSAIPAGAGTQAASLRDSLCLLQKSYRFGSDSGIGKLAAAINCGDRSAIQAVFQQGFSDIEKRTLQSSDDYAGMLDEALAGYGRYLRLLHEKAAPEAILQAFNEYQLLCALREGPFGVRGLNDRIEQAMVQQRKIQRHPHSRWYEGRPVMIARNDSALGLFNGDIGIALDRGQGLRVWFVMPDGTIKSVQPSRLPEHDTTWAMTVHKSQGSEFDHAALILPSQRSPVVTRELVYTAVTRARRRLSLYADERILAGAIVTRTERRSGLATLFDEVSRIG* +>9 +MQEVAMSSQEASKMLRTYNIAWWGNNYYDVNELGHISVCPDPDVPEARVDLAKLVKAREAQGQRLPALFCFPQILQHRLRSINAAFKRARESYGYNGDYFLVYPIKVNQHRRVIESLIHSGEPLGLEAGSKAELMAVLAHAGMTRSVIVCNGYKDREYIRLALIGEKMGHKVYLVIEKMSEIAIVLEEAERLNVVPRLGVRARLASQGSGKWQSSGGEKSKFGLAATQVLQLVETLRDAGRLDSLQLLHFHLGSQMANIRDIATGVRESARFYVELHKLGVNIQCFDVGGGLGVDYEGTRSQSDCSVNYGLNEYANNIIWAIGDACEEHGLPHPTVITESGRAVTAHHTVLVSNIIGVERNEYTDPTAPAEDAPRALQNLWETWQEMHKPGTRRSLREWLHDSQMDLHDIHIGYSSGAFSLQERAWAEQLYLSMCHEVQKQLDPQNRAHRPIIDELQERMADKMYVNFSLFQSMPDAWGIDQLFPVLPLEGLDQVPERRAVLLDITCDSDGAIDHYIDGDGIATTMPMPEYDPENPPMLGFFMVGAYQEILGNMHNLFGDTEAVDVFVFPDGSVEVELSDEGDTVADMLQYVQLDPKTLLTHFRDQVKQTDLDDALQQQFLEEFEAGLYGYTYLEDE* +>10 +MNSLPQRSTDFELTTSQDGFALSWQQRLILRHSAENPCLWIGAGVADIDMFRGNFSIKDKLNEKIALTEATVSELPDGWLVQFSRGATISATLRISADEAGRLTLDLQNDDLHHNRIWLRLAANPDDHIYGCGEQFSYFDLRGKPFPLWTSEQGVGRNKTSYVTWQADCKENAGGDYYLTFFPQPTFVSTQKYYCHVDNSCYMNFDFSAPEYHELALWEDKTTLRFECADTYIALLEKLTALLGRQPELPDWVYDGVTLGIQGGTEVCQQKLDNMRNAGVKVYGIWAQDWSGIRMTSFGKRVMWNWKWNSDNYPQLDSRIKQWKEEGVQFLSYINPYVASDKDLCAEAARHGYLAKDATGGDYLVEFGEFYGGVVDLTNPEAYDWFKDVIKKNMIALGCSGWMADFGEYLPTDTYLHNGVSAELMHNAWPALWAKCNYEALQKTGKLGEILFFMRAGYTGSQKYSTMMWAGDQNVDWSLDDGLASVVPAALSLAMTGHGLHHSDIGGYTTLFDMKRSKELLLRWCDFSAFTPMMRTHEGNRPSNDWQFDGDAETIAHFARMTTVFTTLKPYLKQAVAQNAATGLPVMRPLFLHYENDAATYTLKYQYLLGQDLLVAPVHEQGRCDWTLYLPEDHWVNIWTGEAHHGGEISVDAPIGKPPVFYRAKSEWALLFASLRNI* +>11 +MKLFLTTAALTATLTSGMGFASDTVIPWATNSGGTESTHIAAMGEDVNAQHQHIIHTHEGVCAANSGTIQADEAALTSNKPPVQVQPELLPHQG* +>12 +MKHLRVVACMIMLALAGCDNNDKTAPTTKSEAPAVAQPSPAQDPSQLQKLAQQSQGKALTLLDASEAQLDGAATLVLTFSIPLDPEQDFSRVVHVVDKKSGSVDGAWELAPNLKELRLRHLEPERVLVVTVDPAVKALNNATFGKSYEKTITTRDVQPSVGFASRGSLLPGKIAEGLPVMALNVNHVDVNFFRVKPGSLASFVSQWEYRSSLSNWESDNLLKMADLVYTGRFDLNPARNTREKLLLPLSDIKPLQQAGVYVAVMNQAGHYNYSNAATLFTLSDIGVSAHRYHSRLDIFTQSLENGAAQSGIEIVLLNDKGQTLAQATSDAQGHVQLEADKAAALLLARKEGQTTLLDLTLPALDLSEFNVAGAPGYSKQFFMFGPRDLYRPGETVILNGLLRDSDGKTLPDQPVKLEVVKPDGQVMRTVVSQPENGLYRLNYPLDINAPTGLWHVRANTGDNLLRSWDFHVEDFMPERMALNLTAQKTPLAPADEVKFSVVGYYLYGAPANGNTLQGQLFLRPLRDAVAALPGFQFGNIAEENLSRSLDEVQLTLDKGGRGEVSAASQWQEAHSPLQVILQASLLESGGRPVTRRVEQAIWPADTLPGIRPQFAAKAVYDYRTDTTVNQPIVDEDSNAAFDIVYANAQGEKKAVSGLQVRLIRERRDYYWNWSESEGWQSQFDQKDLVEGEQTLDLNADETGKVSFPVEWGAYRLEVKAPNETVSSVRFWAGYSWQDNSDGSGAARPDRVTLKLDKANYRPGDTMKLHIAAPVAGKGYAMVESSDGPLWWQAIDVPAQGLELTIPVDKTWNRHDLYLSTLVVRPGDKSRSATPKRAVGLLHLPLGDDNRRLDLALESPAKMRPNQPLTVRVKASVKHGEMPKQINVLVSAVDSGVLNITDYATPDPWQAFFGQKRYGADIYDIYGQVIEGQGRLAALRFGGDGDDLTRGGKPPVNHANIIAQQAQPITLNEQGEGVVTLPIGDFNGELRVMAQAWTADDFGRGESKVVVAAPVIAELNMPRFLAGGDVSRLVLDVTNLTDRPQTLNIALAASGLLELLSQQPQPVNLAPGVRTTLFVPVRALEGFGEGEIQATISGLNLPGETLGAQHKQWQIGVRPAWPAQTVNSGIALAPGESWHVPEQHLANVSPATLQGQLLLSGKPPLNLARYIRELKAYPYGCLEQTTSGLFPALYTNAAQLQSLGITGDSDEKRRAAVDIGISRILQMQRDNGGFALWDENGAEEPWLTAYAMDFLIRAGEQGYSVPPEAINRGNERLLRYLQDPGTMLIRYSDNTQASTFAAQAYAALVLARQQKAPLGALREIWERRSQAASGLPLMQLGIALNTMGDARRGEEAITLALNTPRQDERQWIADYGSSLRDNALMLSLLEENNLRPDAQNALLSSLSEQAFGQRWLSTQENNALFLAAHSRQASAGAWQAQTSLEAQPLSGDKALTRNLDADQLAALEVTNTGSQPLWLRLDSSGYPSSAPEPASNVLQIERQILGTDGQRKSLSSLRSGELVLVWLTVVADRNVPDALVVDLLPAGLELENQNLADSSASLPESGSEVQNLLNQMQQADIQYMEFRDDRFVAAVVVNEGQPVTLVYLARAVTPGTYQLAQPQVESMYAPQWRATGASEGLLIVTP* +>13 +MAKNRSRRLRKKMHIDEFQELGFSVAWRFPEGTSEEQIDKTVDDFINDVIEPNKLAFDGSGYLAWEGLICMQEIGKCTEEHQAIVRKWLEARNLEEVRTSELFDVWWD* +>14 +LKLIPFYLLALFSASSGATEINACKDLIGTWKTPADNPPYTVTILPPVEACGEKCVKLNVEYELDVTHRNALYCHERQEGVKGQGPMVIAFEGAYGGHAIGTYNRQLQLLRAGVIPKNKKWKWITKMENYWFRRVKAH* +>15 +MKKFDSCHPVFLLIGCAQVPLRSSVSKPVQQPSAQKEQLANANGIDECQSLPYVPSDLAKNKSLSNQIADNTASKNSAISSRIFCEKYKQTKEQALTFFQELPQYMRSKEVEEQHMTEFKKVLLEPGSKNLSIYQTLLAAHERLQAL* +>16 +MRIKPDDNWRWYYDEEHDRMMLDLANGMLFRSRFSRKMLTPDAFCPTGFCVDDAALYFSFEEKCRDFELTKEQRAELVLNALVAIRYLKPQMPKSWHFVAHGEMWTPGTGDAASVWLSDTAEQVNLLVVEPGENAALCLLAQPGVVIAGRTMQLGDAIKIMNDRLKPQVHCHSFSLEQAV* +>17 +VLAFTLRFIKNKRYFAILAGALVIIAGLDSQHACSGNGLPQINGKALAALAKQHPVVVLFRHAERCDRSDNTCLPDSTGITVNGAQDARALGKAFSADIQNYNLYSSNTVRAIQSATWFSAGRSRRAVKKMMDCGSGIYSSINTLLKNSQIKNIVILTHNHCLTYIVKNKRGVKFDPEYLNALVMYAENGKLLLDGEFVPG* +>18 +MGSNYIVIEGLEGAGKTTARDVVVETLEQLGIRNMIFTREPGGTQLAEKLRSLVLDIRSVGDEVITDKAEVLMFYAARVQLVETVIKPALAQGVWVIGDRHDLSTQAYQGGGRGIDQTMLATLRDAVLGDFRPDLTLYLDVTPEVGLKRARARGDLDRIEQESFDFFNRTRARYLELAAQDSRIRTIDATQPLDAVMRDIRATVTKWVQEQAA* +>19 +MKHIKKSVLVVLLTSHVAHASIVVGGTRLVFDGNNDESSINVENKDSKANLVQSWLSVADPQVTNKQVFIITPPLFRLDAGQKNSIRVIRSGAPLPADRESMYWLNIKGIPSIDDNASANRVEISINTQIKLIYRPPALTKSTPDSQSQQLKWQTAGDVITVNNPTPYYMNFASVTLNSHEVKSATFVPPKSSASFKLSSTAAPHGTVTWRLISDYGMSLEPHSGSF* diff --git a/tests/test_data/outputs/search/G5/nucleotide/hsps.txt b/tests/test_data/outputs/search/G5/nucleotide/hsps.txt new file mode 100755 index 0000000..d8538d1 --- /dev/null +++ b/tests/test_data/outputs/search/G5/nucleotide/hsps.txt @@ -0,0 +1,20 @@ +0 0 102 102 1 102 1 102 103 1 97.087 100 100 plus 1.81e-46 172 +1 9 762 762 1 762 1 762 762 0 100.000 100 100 plus 0.0 1408 +2 10 858 858 1 858 1 858 858 19 97.786 100 100 plus 0.0 1480 +3 11 972 972 1 972 1 972 972 0 100.000 100 100 plus 0.0 1796 +4 12 1098 1098 1 1098 1 1098 1098 0 100.000 100 100 plus 0.0 2028 +5 13 1281 1281 1 1281 1 1281 1281 11 99.141 100 100 plus 0.0 2305 +6 14 1434 1434 1 1434 1 1434 1434 0 100.000 100 100 plus 0.0 2649 +7 15 1464 1464 1 1464 1 1464 1464 15 98.975 100 100 plus 0.0 2621 +8 16 1836 1836 1 1836 1 1836 1836 0 100.000 100 100 plus 0.0 3391 +9 17 1914 1914 1 1914 1 1914 1914 0 100.000 100 100 plus 0.0 3535 +10 18 2037 2037 1 2037 1 2037 2037 16 99.215 100 100 plus 0.0 3674 +11 1 285 285 1 285 1 285 285 17 94.035 100 100 plus 2.31e-124 433 +12 19 4935 4935 1 4935 1 4935 4935 0 100.000 100 100 plus 0.0 9114 +13 2 327 327 1 327 1 327 327 0 100.000 100 100 plus 5.34e-176 604 +14 3 417 417 1 417 1 417 417 11 97.362 100 100 plus 0.0 710 +15 4 444 444 1 444 1 444 444 15 96.622 100 100 plus 0.0 737 +16 5 543 543 1 543 1 543 543 0 100.000 100 100 plus 0.0 1003 +17 6 606 606 1 606 1 606 606 15 97.525 100 100 plus 0.0 1037 +18 7 642 642 1 642 1 642 642 0 100.000 100 100 plus 0.0 1186 +19 8 684 684 1 684 1 684 684 0 100.000 100 100 plus 0.0 1264 diff --git a/tests/test_data/outputs/search/G5/nucleotide/queries.fasta b/tests/test_data/outputs/search/G5/nucleotide/queries.fasta new file mode 100755 index 0000000..306e3c9 --- /dev/null +++ b/tests/test_data/outputs/search/G5/nucleotide/queries.fasta @@ -0,0 +1,40 @@ +>0 +atgtactgaacaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa +>1 +atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa +>2 +gtgaacctggtgaaaaccacgggtctgcacgcggcgggcaaaggcattaacgttgccaaagtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccgacttcaacttttccggctttgacgtgactcccgcagactgggaacgctttgttaacgactccctgagctggctgggtcagttcgatatggtctgcgtcagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatctttgatagtagccgtgaagcgttagtcgccggtcttaaagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcacgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgtgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggtaagtaccgtcggcgcaggcgattccatggttggcggattgatttacggcctgctgatgcgcgagtccagcgaacatacgctgcgtctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga +>3 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>4 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>5 +ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctcttccgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccaacgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcagaaaggcgggattggtttgcgcggtccgggtgaaacccagctcgaaaccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgaaaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgctgctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaaatccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtctgcaggtacgaatgccgattgtcgactggcgtcgcctctgtaaacaagaaccagcgttgatcgaatacgtgatctag +>6 +gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga +>7 +atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctggtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatcagcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacgctgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcattcaaccggaatggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaacgctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgaggcgctggcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtgagctgaccagccttgtgcgcaaccttaatcaactgctcaaaagcgagcgcgaacgttataacaaataccgcacgaccctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatcagcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcataccccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttttggccgacagcatcccacacagaaagaggaataa +>8 +atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa +>9 +atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag +>10 +atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacattgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccaccgtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggccgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattactggaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcattcagggcggtacggaagtttgtcagcaaaaactggataccatgcgcaacgcaggcgtaaaagtgaacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggcgtcagcgcagagatcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacaggagaccggcaagctcggcgagatcctgttctttatgcgtgcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccggcaataactggcagttcgacggcgacgcggaaactattgcccactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgaaattaccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa +>11 +atgaaactttttctgacaacggcagcgctaactgcgaccttgatttccggcatggcatttgccagcgatcccgtcatcccctgggcgactaacagcggcggtacggaaagcacgcatattgcggcaatgggcgaggatttgaatgcgcagcatcagcagatcacccacacgcatgaaggcgtatgggcggccaattccggcagcattcaggctgatgaagccgcgctgaccagcaacaagccgccggtacaaggccacccggaattgatgccgcatcagggctaa +>12 +atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa +>13 +atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa +>14 +ctgaaactaatacctttttatttgctcgcgctattttctgccgcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaaccactgcagataacccgccttataccatgacaatattgccgccagtagaaggctgcggggaaaaatgtgtgaaactgaacgtacagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaaggacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctataaccgacagctgcaattactttgggctggcgtaataccgaaaaataaaaaggggaaatggataacgaaaatggaaaattactggttcaggcaggtaaaagcgcactaa +>15 +atgaaaaaattttatagctgtcttcctgtctttttactgatcggctgtgcccaggtgcccctcccttcctccgtgagcaaaccggtacagcaacctggcgctcagaaagagcaactggccaacgcaaatagtattgatgagtgtcagtctcttccgtatgtgccgtcagaccttgcgaagaataaatcattatcaaaccagaacgctgataattccgcatcaaaaaatagcgcaatcagctcaagcattttttgcgaaaaatataaacaaaccaaagaacaggcgcttaccttcttccaggaacatccacaatacatgcgttcgaaagaagatgaagagcaactcatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa +>16 +atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa +>17 +gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggcctcacaacatgcctggagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgtcagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcaccatccagtcggcaacctggttttccgcaggcaggtcgcttacggcggataaaaaaatgatggattgcggcagcggtatttatgcttcaatcaatacgttactgaaaaagtcgcagaataaaaatatagtcatttttacccataatcattgtttgacgtatattgcaaaaaataagcgcggcgtgaagtttgatccggactatcttaatgcgctcgttatgtatgcggaaaacggaaaactctttttagacggtgaatttgttcccggctag +>18 +atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga +>19 +atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/search/G5/protein/hsps.txt b/tests/test_data/outputs/search/G5/protein/hsps.txt new file mode 100755 index 0000000..823a08e --- /dev/null +++ b/tests/test_data/outputs/search/G5/protein/hsps.txt @@ -0,0 +1,20 @@ +0 0 34 34 1 34 1 34 34 2 94.118 100 100 N/A 2.04e-17 57.8 +1 9 254 254 1 254 1 254 254 0 100.000 100 100 N/A 0.0 514 +2 10 286 286 1 286 1 286 286 17 94.056 100 100 N/A 0.0 518 +3 11 324 324 1 324 1 324 324 0 100.000 100 100 N/A 0.0 640 +4 12 366 366 1 366 1 366 366 0 100.000 100 100 N/A 0.0 754 +5 13 427 427 1 427 1 427 427 9 97.892 100 100 N/A 0.0 846 +6 14 478 478 1 478 1 478 478 0 100.000 100 100 N/A 0.0 972 +7 15 488 488 1 488 1 488 488 14 97.131 100 100 N/A 0.0 972 +8 16 612 612 1 612 1 612 612 0 100.000 100 100 N/A 0.0 1241 +9 17 638 638 1 638 1 638 638 0 100.000 100 100 N/A 0.0 1326 +10 18 679 679 1 679 1 679 679 8 98.822 100 100 N/A 0.0 1403 +11 1 95 95 1 95 1 95 95 11 88.421 100 100 N/A 2.89e-51 147 +12 19 1645 1645 1 1645 1 1645 1645 0 100.000 100 100 N/A 0.0 3332 +13 2 109 109 1 109 1 109 109 0 100.000 100 100 N/A 3.09e-81 224 +14 3 139 139 1 139 1 139 139 9 93.525 100 100 N/A 3.70e-98 269 +15 4 148 148 1 148 1 148 148 11 92.568 100 100 N/A 1.56e-97 268 +16 5 181 181 1 181 1 181 181 0 100.000 100 100 N/A 1.79e-140 380 +17 6 202 202 1 202 1 202 202 14 93.069 100 100 N/A 2.36e-141 384 +18 7 214 214 1 214 1 214 214 0 100.000 100 100 N/A 1.75e-157 426 +19 8 228 228 1 228 1 228 228 0 100.000 100 100 N/A 6.32e-172 463 diff --git a/tests/test_data/outputs/search/G5/protein/queries.fasta b/tests/test_data/outputs/search/G5/protein/queries.fasta new file mode 100755 index 0000000..46a4b87 --- /dev/null +++ b/tests/test_data/outputs/search/G5/protein/queries.fasta @@ -0,0 +1,40 @@ +>0 +MY*TPFLEALMITASFFAIFIIIVVSVLLLEGD* +>1 +MRILLSNDDGVHAPGIQTLAKALREFADVQVVAPDRNRSGASNSLTLESSLRTFTFDNGDIAVQMGTPTDCVYLGVNALMRPRPDIVVSGINAGPNLGDDVIYSGTVAAAMEGRHLGFPALAVSLNGYQHYDTAAAVTCALLRGLSREPLRTGRILNVNVPDLPLAQVKGIRVTRCGSRHPADKVIPQEDPRGNTLYWIGPPGDKYDAGPDTDFAAVDEGYVSVTPLHVDLTAHSAHDVVSDWLDSVGVGTQW* +>2 +VNLVKTTGLHAAGKGINVAKVLKDLGIDVTVGGFLGKDNQDGFQQLFSELGIANRFQVVQGRTRINVKLTEKDGEVTDFNFSGFDVTPADWERFVNDSLSWLGQFDMVCVSGSLPAGVSPEAFTDWMTRLRSQCPCIIFDSSREALVAGLKAAPWLVKPNRRELEIWAGRKLPEMKDVIDAAHALREQGIAHVVISLGAEGALWVNASGEWIAKPPAVDVVSTVGAGDSMVGGLIYGLLMRESSEHTLRLATAVAALAVSQSNVGITDRPQLAAMMARVDLQPFN* +>3 +MNKNKYSTPLLMLATILAGMLSPMQSAVNGQLGHWLQDGNACAVISFASGLVVMFFIIIARKETRQQFASIPTLIKKRKIPLWNWFAGLCGAMVVFSEGASASALGVATFQTALISALLLSGLLCDRFGIGVEEKKYFTPWRITGALFAVIATIFVVSPQWHSTSFILLAILPFLAGLLAGWQPAGNAKVAEATGSMLVSITWNFIVGFCVLGAALAIRIALGHVTIQLPDTWWMYLGGPLGLLSIGLMAILVRGLGLLMLGVASTAGQLLGSVLIDELIPSLGNTVYLVTIIGTLFALVGAIVTTIPEYRASKMAQKMEVSE* +>4 +MKGRWAKYVATGVMLAMLAACSSKPTDRGQQYKDGKFTQPFSLVNQPDAVGAPINAGDFAEQVDQIRSASPRLYTNQSNVYNAVQNWLRSGGDTRTMRQFGIDAWQMEGTDNYGNVQFTGYYTPVVQARHTRQGAFQYPIYSMPPKRGRLPSRAQIYAGALSDKYILAWSNSLMDNFIMDVQGSGYIDFGDGSPLNFFSYAGKNGWPYRSIGKVLIDRGEVKKEDMSMQAIREWGEKHSEAEVRELLEQNPSFVFFKPQSFAPVKGASAVPLIGRASVASDRSIIPPGTTLLAEVPLLDNNGKFSGQYELRLMVALDVGGAIKGQHFDIYQGIGPDAGHRAGWYNHYGRVWVLKSAPGAGNVFSG* +>5 +LFDRYDAGEQAVLVHIYFSQDKDMEDLQEFESLVSSAGVEAMQVITGSRKAPHPKYFVGEGKAVEIAEAVKATGAAVVLFDHALSPAQERNLERLCECRVIDRTGLILDIFAQRARTHEGKLQVELAQLRHLATRLVRGWTHLERQKGGIGLRGPGETQLETDRRLLRNRIVQIQSRLEKVEKQREQGRQSRIKADVPTVSLVGYTNAGKSTLFNQITEARVYAADQLFATLDPTLRRIDVADVGETVLADTVGFIRHLPYDLVAAFKATLQETRQATLLLHVVDAADVRVQENIEAVNTVLEEIDAHEIPTLMVMNKIDMLDDFEPRIDRDEENKPIRVWLSAQSGVGIPQLFQALTERLSGEVAQHTLRLLPQEGRLRSRFYQLQAIEKEWMEEDGSVSLQVRMPIVDWRRLCKQEPALIEYVI* +>6 +VAQRILVLGASGYIGQHLVFALSQQGHQVRAAARRVERLEKHRLANVSCHKVDLHWPENLPALLRDIDTVYYLVHGMGEGGDFIAHERQAALNVRDALRQTPVKQLIFLSSLQAPAHEQSDHLRARQLTADTLRDAGVPVTELRAGIIVGAGSAAFEVMRDMVYNLPILTPPRWVRSRTTPIALENLLYYLVGLLEHPAHEHRILEAAGPQVLSYQQQFERFMAVSGKRRPLIPVPFPTRWISVWFLNVITSVPPTTAKALIQGLRHDLLADDAALKKLIPQTLITFDDAVRRTLKEEEKLVNSSDWGYDALAFARWRPEYGYFPKQAGFTAQTPASLSALWQVVNRLGGKEGYFFGNILWQTRAAMDRLVGHKLAKGRPSHTLLKPGDTVDSWKVIIVEPEKQLTLLFGMKAPGLGRLSFTLHDKGRYREIDVRAWWHPHGMPGLIYWLLMIPAHLFIFRGMARRIARLAEQITEK* +>7 +MNKFARHFLPLSLRVRFLLATAGVVLVLSLAYGIVALVGYSVSFDKTTFRLLRGESNLFYTLAKWENNKISVELPENLDMQSPTMTLIYDETGKLLWTQRNIPWLIKSIQPEWLKTNGFHEIETNVDATSTLLSEDHSAQEKLKEVREDDDDAEMTHSVAVNIYPATTRMPQLTIVVVDTIPIELKRSYMVWSWFVYVLAANLLLVIPLLWIAAWWSLRPIEALAREVRELEDHHREMLNPETTRELTSLVRNLNQLLKSERERYNKYRTTLTDLTHSLKTPLAVLQSTLRSLRNEKMSVSKAEPVMLEQISRISQQIGYYLHRASMRGSGVLLSRELHPVAPLLDNLISALNKVYQRKGVNISMDISPEISFVGEQNDFVEVMGNVLDNACKYCLEFVEISARQTDDHLHIFVEDDGPGIPHSKRSLVFDRGQRADTLRPGQGVGLAVAREITEQYAGQIIASDSLLGGARMEVVFGRQHPTQKEE* +>8 +MTIQKRLLEAVEQKLLRPIDAQFALTVAGNDDPAVTLAAALLSHDAGEGHVCLPLSRLTLTEEAHPLLVAWISETATPIDWKKRLLASAAVSCGDSPAPLILCGDRLYLNRMWCNERTVARFFNEVNQAIAVDEDQLSRILDALFPPTDEVNWQKVAAAVALTRRISVISGGPGTGKTTTVAKLLAALIQMADGERCRIRLAAPTGKAAARLTESLGAALRQLPLTDAQKKRIPEDASTLHRLLGAQPGSQRLRHHAGNPLHLDVLVVDEASMIDLPMMSRLIDALPPHGRVIFLGDRDQLASVEAGAVLGDICAYVNAGFTAERARQLSRLTGSAIPAGAGTQAASLRDSLCLLQKSYRFGSDSGIGKLAAAINCGDRSAIQAVFQQGFSDIEKRTLQSSDDYAGMLDEALAGYGRYLRLLHEKAAPEAILQAFNEYQLLCALREGPFGVRGLNDRIEQAMVQQRKIQRHPHSRWYEGRPVMIARNDSALGLFNGDIGIALDRGQGLRVWFVMPDGTIKSVQPSRLPEHDTTWAMTVHKSQGSEFDHAALILPSQRSPVVTRELVYTAVTRARRRLSLYADERILAGAIVTRTERRSGLATLFDEVSRIG* +>9 +MQEVAMSSQEASKMLRTYNIAWWGNNYYDVNELGHISVCPDPDVPEARVDLAKLVKAREAQGQRLPALFCFPQILQHRLRSINAAFKRARESYGYNGDYFLVYPIKVNQHRRVIESLIHSGEPLGLEAGSKAELMAVLAHAGMTRSVIVCNGYKDREYIRLALIGEKMGHKVYLVIEKMSEIAIVLEEAERLNVVPRLGVRARLASQGSGKWQSSGGEKSKFGLAATQVLQLVETLRDAGRLDSLQLLHFHLGSQMANIRDIATGVRESARFYVELHKLGVNIQCFDVGGGLGVDYEGTRSQSDCSVNYGLNEYANNIIWAIGDACEEHGLPHPTVITESGRAVTAHHTVLVSNIIGVERNEYTDPTAPAEDAPRALQNLWETWQEMHKPGTRRSLREWLHDSQMDLHDIHIGYSSGAFSLQERAWAEQLYLSMCHEVQKQLDPQNRAHRPIIDELQERMADKMYVNFSLFQSMPDAWGIDQLFPVLPLEGLDQVPERRAVLLDITCDSDGAIDHYIDGDGIATTMPMPEYDPENPPMLGFFMVGAYQEILGNMHNLFGDTEAVDVFVFPDGSVEVELSDEGDTVADMLQYVQLDPKTLLTHFRDQVKQTDLDDALQQQFLEEFEAGLYGYTYLEDE* +>10 +MNSLPQRSTDFELTTSQDGFALSWQQRLILRHSAENPCLWIGAGVADIDMFRGNFSIKDKLNEKIALTEATVSELPDGWLVQFSRGATISATLRISADEAGRLTLDLQNDDLHHNRIWLRLAANPDDHIYGCGEQFSYFDLRGKPFPLWTSEQGVGRNKTSYVTWQADCKENAGGDYYWTFFPQPTFVSTQKYYCHVDNSCYMNFDFSAPEYHELALWEDKTTLRFECADTYIALLEKLTALLGRQPELPDWVYDGVTLGIQGGTEVCQQKLDTMRNAGVKVNGIWAQDWSGIRMTSFGKRVMWNWKWNSDNYPQLDSRIKQWKEEGVQFLSYINPYVASDKDLCAEAARHGYLAKDATGGDYLVEFGEFYGGVVDLTNPEAYDWFKDVIKKNMIALGCSGWMADFGEYLPTDTYLHNGVSAEIMHNAWPALWAKCNYEALQETGKLGEILFFMRAGYTGSQKYSTMMWAGDQNVDWSLDDGLASVVPAALSLAMTGHGLHHSDIGGYTTLFDMKRSKELLLRWCDFSAFTPMMRTHEGNRPGNNWQFDGDAETIAHFARMTTVFTTLKPYLKQAVAQNAATGLPVMRPLFLHYENDAATYTLKYQYLLGQDLLVAPVHEQGRCDWTLYLPEDHWVNIWTGEAHHGGEITVDAPIGKPPVFYRAKSEWALLFASLRNI* +>11 +MKLFLTTAALTATLISGMAFASDPVIPWATNSGGTESTHIAAMGEDLNAQHQQITHTHEGVWAANSGSIQADEAALTSNKPPVQGHPELMPHQG* +>12 +MKHLRVVACMIMLALAGCDNNDKTAPTTKSEAPAVAQPSPAQDPSQLQKLAQQSQGKALTLLDASEAQLDGAATLVLTFSIPLDPEQDFSRVVHVVDKKSGSVDGAWELAPNLKELRLRHLEPERVLVVTVDPAVKALNNATFGKSYEKTITTRDVQPSVGFASRGSLLPGKIAEGLPVMALNVNHVDVNFFRVKPGSLASFVSQWEYRSSLSNWESDNLLKMADLVYTGRFDLNPARNTREKLLLPLSDIKPLQQAGVYVAVMNQAGHYNYSNAATLFTLSDIGVSAHRYHSRLDIFTQSLENGAAQSGIEIVLLNDKGQTLAQATSDAQGHVQLEADKAAALLLARKEGQTTLLDLTLPALDLSEFNVAGAPGYSKQFFMFGPRDLYRPGETVILNGLLRDSDGKTLPDQPVKLEVVKPDGQVMRTVVSQPENGLYRLNYPLDINAPTGLWHVRANTGDNLLRSWDFHVEDFMPERMALNLTAQKTPLAPADEVKFSVVGYYLYGAPANGNTLQGQLFLRPLRDAVAALPGFQFGNIAEENLSRSLDEVQLTLDKGGRGEVSAASQWQEAHSPLQVILQASLLESGGRPVTRRVEQAIWPADTLPGIRPQFAAKAVYDYRTDTTVNQPIVDEDSNAAFDIVYANAQGEKKAVSGLQVRLIRERRDYYWNWSESEGWQSQFDQKDLVEGEQTLDLNADETGKVSFPVEWGAYRLEVKAPNETVSSVRFWAGYSWQDNSDGSGAARPDRVTLKLDKANYRPGDTMKLHIAAPVAGKGYAMVESSDGPLWWQAIDVPAQGLELTIPVDKTWNRHDLYLSTLVVRPGDKSRSATPKRAVGLLHLPLGDDNRRLDLALESPAKMRPNQPLTVRVKASVKHGEMPKQINVLVSAVDSGVLNITDYATPDPWQAFFGQKRYGADIYDIYGQVIEGQGRLAALRFGGDGDDLTRGGKPPVNHANIIAQQAQPITLNEQGEGVVTLPIGDFNGELRVMAQAWTADDFGRGESKVVVAAPVIAELNMPRFLAGGDVSRLVLDVTNLTDRPQTLNIALAASGLLELLSQQPQPVNLAPGVRTTLFVPVRALEGFGEGEIQATISGLNLPGETLGAQHKQWQIGVRPAWPAQTVNSGIALAPGESWHVPEQHLANVSPATLQGQLLLSGKPPLNLARYIRELKAYPYGCLEQTTSGLFPALYTNAAQLQSLGITGDSDEKRRAAVDIGISRILQMQRDNGGFALWDENGAEEPWLTAYAMDFLIRAGEQGYSVPPEAINRGNERLLRYLQDPGTMLIRYSDNTQASTFAAQAYAALVLARQQKAPLGALREIWERRSQAASGLPLMQLGIALNTMGDARRGEEAITLALNTPRQDERQWIADYGSSLRDNALMLSLLEENNLRPDAQNALLSSLSEQAFGQRWLSTQENNALFLAAHSRQASAGAWQAQTSLEAQPLSGDKALTRNLDADQLAALEVTNTGSQPLWLRLDSSGYPSSAPEPASNVLQIERQILGTDGQRKSLSSLRSGELVLVWLTVVADRNVPDALVVDLLPAGLELENQNLADSSASLPESGSEVQNLLNQMQQADIQYMEFRDDRFVAAVVVNEGQPVTLVYLARAVTPGTYQLAQPQVESMYAPQWRATGASEGLLIVTP* +>13 +MAKNRSRRLRKKMHIDEFQELGFSVAWRFPEGTSEEQIDKTVDDFINDVIEPNKLAFDGSGYLAWEGLICMQEIGKCTEEHQAIVRKWLEARNLEEVRTSELFDVWWD* +>14 +LKLIPFYLLALFSAASGATEINACKDLIGTWKTTADNPPYTMTILPPVEGCGEKCVKLNVQYELDVTHRNALYCHEGQEGVKGQGPMVIAFEGAYGGHAIGTYNRQLQLLWAGVIPKNKKGKWITKMENYWFRQVKAH* +>15 +MKKFYSCLPVFLLIGCAQVPLPSSVSKPVQQPGAQKEQLANANSIDECQSLPYVPSDLAKNKSLSNQNADNSASKNSAISSSIFCEKYKQTKEQALTFFQEHPQYMRSKEDEEQLMTEFKKVLLEPGSKNLSIYQTLLAAHERLQAL* +>16 +MRIKPDDNWRWYYDEEHDRMMLDLANGMLFRSRFSRKMLTPDAFCPTGFCVDDAALYFSFEEKCRDFELTKEQRAELVLNALVAIRYLKPQMPKSWHFVAHGEMWTPGTGDAASVWLSDTAEQVNLLVVEPGENAALCLLAQPGVVIAGRTMQLGDAIKIMNDRLKPQVHCHSFSLEQAV* +>17 +VLAFTLRFIKNKRYFAILAGALVIIAGLASQHAWSGNGLPQINGKALAALAKQHPVVVLFRHAERCDRSDNTCLSDSTGITVNGAQDARALGKAFSADIQNYNLYSSNTVRTIQSATWFSAGRSLTADKKMMDCGSGIYASINTLLKKSQNKNIVIFTHNHCLTYIAKNKRGVKFDPDYLNALVMYAENGKLFLDGEFVPG* +>18 +MGSNYIVIEGLEGAGKTTARDVVVETLEQLGIRNMIFTREPGGTQLAEKLRSLVLDIRSVGDEVITDKAEVLMFYAARVQLVETVIKPALAQGVWVIGDRHDLSTQAYQGGGRGIDQTMLATLRDAVLGDFRPDLTLYLDVTPEVGLKRARARGDLDRIEQESFDFFNRTRARYLELAAQDSRIRTIDATQPLDAVMRDIRATVTKWVQEQAA* +>19 +MKHIKKSVLVVLLTSHVAHASIVVGGTRLVFDGNNDESSINVENKDSKANLVQSWLSVADPQVTNKQVFIITPPLFRLDAGQKNSIRVIRSGAPLPADRESMYWLNIKGIPSIDDNASANRVEISINTQIKLIYRPPALTKSTPDSQSQQLKWQTAGDVITVNNPTPYYMNFASVTLNSHEVKSATFVPPKSSASFKLSSTAAPHGTVTWRLISDYGMSLEPHSGSF* diff --git a/tests/test_data/outputs/search/G6/nucleotide/hsps.txt b/tests/test_data/outputs/search/G6/nucleotide/hsps.txt new file mode 100755 index 0000000..8e411cd --- /dev/null +++ b/tests/test_data/outputs/search/G6/nucleotide/hsps.txt @@ -0,0 +1,20 @@ +0 0 102 102 1 102 1 102 102 2 98.039 100 100 plus 3.90e-48 178 +1 9 762 762 1 762 1 762 762 0 100.000 100 100 plus 0.0 1408 +2 10 858 858 1 858 1 858 858 0 100.000 100 100 plus 0.0 1585 +3 11 972 972 1 972 1 972 972 0 100.000 100 100 plus 0.0 1796 +4 12 1098 1098 1 1098 1 1098 1098 0 100.000 100 100 plus 0.0 2028 +5 13 1281 1281 1 1281 1 1281 1281 0 100.000 100 100 plus 0.0 2366 +6 14 1434 1434 1 1434 1 1434 1434 0 100.000 100 100 plus 0.0 2649 +7 15 1464 1464 1 1464 1 1464 1464 0 100.000 100 100 plus 0.0 2704 +8 16 1836 1836 1 1836 1 1836 1836 0 100.000 100 100 plus 0.0 3391 +9 17 1914 1914 1 1914 1 1914 1914 0 100.000 100 100 plus 0.0 3535 +10 18 2037 2037 1 2037 1 2037 2037 0 100.000 100 100 plus 0.0 3762 +11 1 285 285 1 285 1 285 285 0 100.000 100 100 plus 1.03e-152 527 +12 19 4935 4935 1 4935 1 4935 4935 0 100.000 100 100 plus 0.0 9114 +13 2 327 327 1 327 1 327 327 0 100.000 100 100 plus 5.34e-176 604 +14 3 417 417 1 417 1 417 417 0 100.000 100 100 plus 0.0 771 +15 4 444 444 1 444 1 444 444 0 100.000 100 100 plus 0.0 821 +16 5 543 543 1 543 1 543 543 0 100.000 100 100 plus 0.0 1003 +17 6 606 606 1 606 1 606 606 0 100.000 100 100 plus 0.0 1120 +18 7 642 642 1 642 1 642 642 0 100.000 100 100 plus 0.0 1186 +19 8 684 684 1 684 1 684 684 0 100.000 100 100 plus 0.0 1264 diff --git a/tests/test_data/outputs/search/G6/nucleotide/queries.fasta b/tests/test_data/outputs/search/G6/nucleotide/queries.fasta new file mode 100755 index 0000000..26e0ed8 --- /dev/null +++ b/tests/test_data/outputs/search/G6/nucleotide/queries.fasta @@ -0,0 +1,40 @@ +>0 +atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtgattcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa +>1 +atgcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa +>2 +gtgaacctggtgaaaaccccgggtctgcacgcggcgggcaaaggcattaacgttgccaatgtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccaacttcaactttcccggctttgacgtgactcccgcagactgggaacgctttgttaacgacttcctgagctggctgggtcagttcgatatggtctgcgtaagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatcgttgatactagccgtgtagagttagtcgccggtcttatagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcccgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgcgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggaaaataccgtcggcgcaggcggttccatggttggcggatggatttacggcctgctgatgcgcgagtccaccgaacatacgctgcgcctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga +>3 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>4 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>5 +ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctattacgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccagcgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcggaaaggcgggattggttcgcgcggtccgggtgaaacccagctcgaagccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgagaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgcagctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaattccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtccgcaggtacgaatgccgattgtcgactggcgtcgcttctgtaaacaagaaccagcgttgatcgaatacgtgatctag +>6 +gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga +>7 +atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctcgtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatctgcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacggtgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcactcaaccggaagggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaaggctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgatgcgctgtcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtaagctgatcagccttgtgcgcaaccttaatcaactgctcaaaagcgagcacgaacgttataacaaataccgcacgagcctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatctgcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcatagcccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttctggccgacagcatcccacacagaaagaggaataa +>8 +atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa +>9 +atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag +>10 +atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacatcgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccacagtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggtcgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattacttgaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcatacagggcggtacggaagtttgtcagcaaaaactggataacatgcgcaacgcaggcgtaaaagtgtacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggggtcagcgcagagctcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacagaagaccggcaagctcggcgagatcctgttctttatgcgggcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccagcaatgactggcagttcgacggcgacgcggaaactattgcacactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgagatttccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa +>11 +atgaaactttttctgacaacggcagcgctaactgcgaccttgacttccggcatgggatttgccagcgataccgtgatcccctgggcgactaacagtggcggtacggaaagcacgcatatcgcggcaatgggcgaggatgtgaatgcgcagcatcagcacatcatccacacgcatgaaggcgtatgtgccgcaaattccggcaccattcaggctgatgaagccgcgctgaccagcaacaagccaccggtacaagtccaaccggaattgttgccgcatcagggctaa +>12 +atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa +>13 +atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa +>14 +ctgaaactaatacctttttatttgctcgcgctattttctgcctcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaacccctgcagataacccgccttataccgtgacaatattgccgccagtagaagcctgcggggaaaaatgtgtgaaactgaacgtagagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaacgacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctacaaccgacagctgcaattacttagggctggcgtaataccgaaaaataaaaagtggaaatggataacgaagatggaaaattactggttcaggcgggtaaaagcgcactaa +>15 +atgaaaaaatttgatagctgtcatcctgtctttttactgatcggctgtgcccaggtgcctctccgttcctccgtgagcaaaccggtacagcaacctagcgctcagaaagagcaactggccaacgcaaatggtattgatgagtgtcagtcacttccgtatgtgccttcagaccttgcgaagaataaatcattatcaaaccagatcgctgataataccgcatcaaaaaatagcgcaatcagctcacgcattttttgcgaaaaatataagcaaaccaaagaacaggcgcttaccttcttccaggaacttccacaatacatgcgttcgaaagaagttgaagagcaacacatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa +>16 +atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa +>17 +gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggactcacaacatgcctgtagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgccagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcgccatccagtcggcaacctggttttcggcaggcaggtcgcgtagggcggttaaaaaaatgatggattgcggcagcggtatttattcttcaatcaatacgttactgaaaaattcgcagattaaaaatatagtcattttaacccataatcattgtttgacgtatattgtaaaaaataagcgcggcgtgaagtttgatccggagtatcttaatgcgctcgttatgtatgcggaaaacggaaaactccttttagacggtgaatttgttcccggctag +>18 +atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga +>19 +atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/search/G6/protein/hsps.txt b/tests/test_data/outputs/search/G6/protein/hsps.txt new file mode 100755 index 0000000..f47c1c9 --- /dev/null +++ b/tests/test_data/outputs/search/G6/protein/hsps.txt @@ -0,0 +1,20 @@ +0 0 34 34 1 34 1 34 34 1 97.059 100 100 N/A 1.28e-18 60.8 +1 9 254 254 1 254 1 254 254 0 100.000 100 100 N/A 0.0 514 +2 10 286 286 1 286 1 286 286 0 100.000 100 100 N/A 0.0 579 +3 11 324 324 1 324 1 324 324 0 100.000 100 100 N/A 0.0 640 +4 12 366 366 1 366 1 366 366 0 100.000 100 100 N/A 0.0 754 +5 13 427 427 1 427 1 427 427 0 100.000 100 100 N/A 0.0 869 +6 14 478 478 1 478 1 478 478 0 100.000 100 100 N/A 0.0 972 +7 15 488 488 1 488 1 488 488 0 100.000 100 100 N/A 0.0 1004 +8 16 612 612 1 612 1 612 612 0 100.000 100 100 N/A 0.0 1241 +9 17 638 638 1 638 1 638 638 0 100.000 100 100 N/A 0.0 1326 +10 18 679 679 1 679 1 679 679 0 100.000 100 100 N/A 0.0 1419 +11 1 95 95 1 95 1 95 95 0 100.000 100 100 N/A 1.23e-68 191 +12 19 1645 1645 1 1645 1 1645 1645 0 100.000 100 100 N/A 0.0 3332 +13 2 109 109 1 109 1 109 109 0 100.000 100 100 N/A 3.09e-81 224 +14 3 139 139 1 139 1 139 139 0 100.000 100 100 N/A 2.34e-106 290 +15 4 148 148 1 148 1 148 148 0 100.000 100 100 N/A 1.04e-110 302 +16 5 181 181 1 181 1 181 181 0 100.000 100 100 N/A 1.79e-140 380 +17 6 202 202 1 202 1 202 202 0 100.000 100 100 N/A 3.71e-154 416 +18 7 214 214 1 214 1 214 214 0 100.000 100 100 N/A 1.75e-157 426 +19 8 228 228 1 228 1 228 228 0 100.000 100 100 N/A 6.32e-172 463 diff --git a/tests/test_data/outputs/search/G6/protein/queries.fasta b/tests/test_data/outputs/search/G6/protein/queries.fasta new file mode 100755 index 0000000..9f022f4 --- /dev/null +++ b/tests/test_data/outputs/search/G6/protein/queries.fasta @@ -0,0 +1,40 @@ +>0 +MYDPPFLEALMITAS*FAIFIIIVVSVLLLEGD* +>1 +MRILLSNDDGVHAPGIQTLAKALREFADVQVVAPDRNRSGASNSLTLESSLRTFTFDNGDIAVQMGTPTDCVYLGVNALMRPRPDIVVSGINAGPNLGDDVIYSGTVAAAMEGRHLGFPALAVSLNGYQHYDTAAAVTCALLRGLSREPLRTGRILNVNVPDLPLAQVKGIRVTRCGSRHPADKVIPQEDPRGNTLYWIGPPGDKYDAGPDTDFAAVDEGYVSVTPLHVDLTAHSAHDVVSDWLDSVGVGTQW* +>2 +VNLVKTPGLHAAGKGINVANVLKDLGIDVTVGGFLGKDNQDGFQQLFSELGIANRFQVVQGRTRINVKLTEKDGEVTNFNFPGFDVTPADWERFVNDFLSWLGQFDMVCVSGSLPAGVSPEAFTDWMTRLRSQCPCIIVDTSRVELVAGLIAAPWLVKPNRRELEIWAGRKLPEMKDVIDAAPALREQGIAHVVISLGAEGALRVNASGEWIAKPPAVDVENTVGAGGSMVGGWIYGLLMRESTEHTLRLATAVAALAVSQSNVGITDRPQLAAMMARVDLQPFN* +>3 +MNKNKYSTPLLMLATILAGMLSPMQSAVNGQLGHWLQDGNACAVISFASGLVVMFFIIIARKETRQQFASIPTLIKKRKIPLWNWFAGLCGAMVVFSEGASASALGVATFQTALISALLLSGLLCDRFGIGVEEKKYFTPWRITGALFAVIATIFVVSPQWHSTSFILLAILPFLAGLLAGWQPAGNAKVAEATGSMLVSITWNFIVGFCVLGAALAIRIALGHVTIQLPDTWWMYLGGPLGLLSIGLMAILVRGLGLLMLGVASTAGQLLGSVLIDELIPSLGNTVYLVTIIGTLFALVGAIVTTIPEYRASKMAQKMEVSE* +>4 +MKGRWAKYVATGVMLAMLAACSSKPTDRGQQYKDGKFTQPFSLVNQPDAVGAPINAGDFAEQVDQIRSASPRLYTNQSNVYNAVQNWLRSGGDTRTMRQFGIDAWQMEGTDNYGNVQFTGYYTPVVQARHTRQGAFQYPIYSMPPKRGRLPSRAQIYAGALSDKYILAWSNSLMDNFIMDVQGSGYIDFGDGSPLNFFSYAGKNGWPYRSIGKVLIDRGEVKKEDMSMQAIREWGEKHSEAEVRELLEQNPSFVFFKPQSFAPVKGASAVPLIGRASVASDRSIIPPGTTLLAEVPLLDNNGKFSGQYELRLMVALDVGGAIKGQHFDIYQGIGPDAGHRAGWYNHYGRVWVLKSAPGAGNVFSG* +>5 +LFDRYDAGEQAVLVHIYFSQDKDMEDLQEFESLVYYAGVEAMQVITGSRKAPHPKYFVGEGKAVEIAEAVKATGAAVVLFDHALSPAQERNLERLCECRVIDRTGLILDIFAQRARTHEGKLQVELAQLRHLATRLVRGWTHLERRKGGIGSRGPGETQLEADRRLLRNRIVQIQSRLEKVEKQREQGRQSRIKADVPTVSLVGYTNAGKSTLFNQITEARVYAADQLFATLDPTLRRIDVADVGETVLADTVGFIRHLPYDLVAAFKATLQETRQATQLLHVVDAADVRVQENIEAVNTVLEEIDAHEFPTLMVMNKIDMLDDFEPRIDRDEENKPIRVWLSAQSGVGIPQLFQALTERLSGEVAQHTLRLLPQEGRLRSRFYQLQAIEKEWMEEDGSVSPQVRMPIVDWRRFCKQEPALIEYVI* +>6 +VAQRILVLGASGYIGQHLVFALSQQGHQVRAAARRVERLEKHRLANVSCHKVDLHWPENLPALLRDIDTVYYLVHGMGEGGDFIAHERQAALNVRDALRQTPVKQLIFLSSLQAPAHEQSDHLRARQLTADTLRDAGVPVTELRAGIIVGAGSAAFEVMRDMVYNLPILTPPRWVRSRTTPIALENLLYYLVGLLEHPAHEHRILEAAGPQVLSYQQQFERFMAVSGKRRPLIPVPFPTRWISVWFLNVITSVPPTTAKALIQGLRHDLLADDAALKKLIPQTLITFDDAVRRTLKEEEKLVNSSDWGYDALAFARWRPEYGYFPKQAGFTAQTPASLSALWQVVNRLGGKEGYFFGNILWQTRAAMDRLVGHKLAKGRPSHTLLKPGDTVDSWKVIIVEPEKQLTLLFGMKAPGLGRLSFTLHDKGRYREIDVRAWWHPHGMPGLIYWLLMIPAHLFIFRGMARRIARLAEQITEK* +>7 +MNKFARHFLPLSLRVRFLLATAGVVLVLSLAYGIVALVGYSVSFDKTTFRLLRGESNLFYTLAKWENNKICVELPENLDMQSPTMTVIYDETGKLLWTQRNIPWLIKSTQPEGLKTNGFHEIETNVDATSTLLSEDHSAQEKLKEVREDDDDAEMTHSVAVNIYPATTRMPQLTIVVVDTIPIELKGSYMVWSWFVYVLAANLLLVIPLLWIAAWWSLRPIDALSREVRELEDHHREMLNPETTRKLISLVRNLNQLLKSEHERYNKYRTSLTDLTHSLKTPLAVLQSTLRSLRNEKMSVSKAEPVMLEQISRISQQIGYYLHRASMRGSGVLLSRELHPVAPLLDNLISALNKVYLRKGVNISMDISPEISFVGEQNDFVEVMGNVLDNACKYCLEFVEISARQTDDHLHIFVEDDGPGIAHSKRSLVFDRGQRADTLRPGQGVGLAVAREITEQYAGQIIASDSLLGGARMEVVSGRQHPTQKEE* +>8 +MTIQKRLLEAVEQKLLRPIDAQFALTVAGNDDPAVTLAAALLSHDAGEGHVCLPLSRLTLTEEAHPLLVAWISETATPIDWKKRLLASAAVSCGDSPAPLILCGDRLYLNRMWCNERTVARFFNEVNQAIAVDEDQLSRILDALFPPTDEVNWQKVAAAVALTRRISVISGGPGTGKTTTVAKLLAALIQMADGERCRIRLAAPTGKAAARLTESLGAALRQLPLTDAQKKRIPEDASTLHRLLGAQPGSQRLRHHAGNPLHLDVLVVDEASMIDLPMMSRLIDALPPHGRVIFLGDRDQLASVEAGAVLGDICAYVNAGFTAERARQLSRLTGSAIPAGAGTQAASLRDSLCLLQKSYRFGSDSGIGKLAAAINCGDRSAIQAVFQQGFSDIEKRTLQSSDDYAGMLDEALAGYGRYLRLLHEKAAPEAILQAFNEYQLLCALREGPFGVRGLNDRIEQAMVQQRKIQRHPHSRWYEGRPVMIARNDSALGLFNGDIGIALDRGQGLRVWFVMPDGTIKSVQPSRLPEHDTTWAMTVHKSQGSEFDHAALILPSQRSPVVTRELVYTAVTRARRRLSLYADERILAGAIVTRTERRSGLATLFDEVSRIG* +>9 +MQEVAMSSQEASKMLRTYNIAWWGNNYYDVNELGHISVCPDPDVPEARVDLAKLVKAREAQGQRLPALFCFPQILQHRLRSINAAFKRARESYGYNGDYFLVYPIKVNQHRRVIESLIHSGEPLGLEAGSKAELMAVLAHAGMTRSVIVCNGYKDREYIRLALIGEKMGHKVYLVIEKMSEIAIVLEEAERLNVVPRLGVRARLASQGSGKWQSSGGEKSKFGLAATQVLQLVETLRDAGRLDSLQLLHFHLGSQMANIRDIATGVRESARFYVELHKLGVNIQCFDVGGGLGVDYEGTRSQSDCSVNYGLNEYANNIIWAIGDACEEHGLPHPTVITESGRAVTAHHTVLVSNIIGVERNEYTDPTAPAEDAPRALQNLWETWQEMHKPGTRRSLREWLHDSQMDLHDIHIGYSSGAFSLQERAWAEQLYLSMCHEVQKQLDPQNRAHRPIIDELQERMADKMYVNFSLFQSMPDAWGIDQLFPVLPLEGLDQVPERRAVLLDITCDSDGAIDHYIDGDGIATTMPMPEYDPENPPMLGFFMVGAYQEILGNMHNLFGDTEAVDVFVFPDGSVEVELSDEGDTVADMLQYVQLDPKTLLTHFRDQVKQTDLDDALQQQFLEEFEAGLYGYTYLEDE* +>10 +MNSLPQRSTDFELTTSQDGFALSWQQRLILRHSAENPCLWIGAGVADIDMFRGNFSIKDKLNEKIALTEATVSELPDGWLVQFSRGATISATLRISADEAGRLTLDLQNDDLHHNRIWLRLAANPDDHIYGCGEQFSYFDLRGKPFPLWTSEQGVGRNKTSYVTWQADCKENAGGDYYLTFFPQPTFVSTQKYYCHVDNSCYMNFDFSAPEYHELALWEDKTTLRFECADTYIALLEKLTALLGRQPELPDWVYDGVTLGIQGGTEVCQQKLDNMRNAGVKVYGIWAQDWSGIRMTSFGKRVMWNWKWNSDNYPQLDSRIKQWKEEGVQFLSYINPYVASDKDLCAEAARHGYLAKDATGGDYLVEFGEFYGGVVDLTNPEAYDWFKDVIKKNMIALGCSGWMADFGEYLPTDTYLHNGVSAELMHNAWPALWAKCNYEALQKTGKLGEILFFMRAGYTGSQKYSTMMWAGDQNVDWSLDDGLASVVPAALSLAMTGHGLHHSDIGGYTTLFDMKRSKELLLRWCDFSAFTPMMRTHEGNRPSNDWQFDGDAETIAHFARMTTVFTTLKPYLKQAVAQNAATGLPVMRPLFLHYENDAATYTLKYQYLLGQDLLVAPVHEQGRCDWTLYLPEDHWVNIWTGEAHHGGEISVDAPIGKPPVFYRAKSEWALLFASLRNI* +>11 +MKLFLTTAALTATLTSGMGFASDTVIPWATNSGGTESTHIAAMGEDVNAQHQHIIHTHEGVCAANSGTIQADEAALTSNKPPVQVQPELLPHQG* +>12 +MKHLRVVACMIMLALAGCDNNDKTAPTTKSEAPAVAQPSPAQDPSQLQKLAQQSQGKALTLLDASEAQLDGAATLVLTFSIPLDPEQDFSRVVHVVDKKSGSVDGAWELAPNLKELRLRHLEPERVLVVTVDPAVKALNNATFGKSYEKTITTRDVQPSVGFASRGSLLPGKIAEGLPVMALNVNHVDVNFFRVKPGSLASFVSQWEYRSSLSNWESDNLLKMADLVYTGRFDLNPARNTREKLLLPLSDIKPLQQAGVYVAVMNQAGHYNYSNAATLFTLSDIGVSAHRYHSRLDIFTQSLENGAAQSGIEIVLLNDKGQTLAQATSDAQGHVQLEADKAAALLLARKEGQTTLLDLTLPALDLSEFNVAGAPGYSKQFFMFGPRDLYRPGETVILNGLLRDSDGKTLPDQPVKLEVVKPDGQVMRTVVSQPENGLYRLNYPLDINAPTGLWHVRANTGDNLLRSWDFHVEDFMPERMALNLTAQKTPLAPADEVKFSVVGYYLYGAPANGNTLQGQLFLRPLRDAVAALPGFQFGNIAEENLSRSLDEVQLTLDKGGRGEVSAASQWQEAHSPLQVILQASLLESGGRPVTRRVEQAIWPADTLPGIRPQFAAKAVYDYRTDTTVNQPIVDEDSNAAFDIVYANAQGEKKAVSGLQVRLIRERRDYYWNWSESEGWQSQFDQKDLVEGEQTLDLNADETGKVSFPVEWGAYRLEVKAPNETVSSVRFWAGYSWQDNSDGSGAARPDRVTLKLDKANYRPGDTMKLHIAAPVAGKGYAMVESSDGPLWWQAIDVPAQGLELTIPVDKTWNRHDLYLSTLVVRPGDKSRSATPKRAVGLLHLPLGDDNRRLDLALESPAKMRPNQPLTVRVKASVKHGEMPKQINVLVSAVDSGVLNITDYATPDPWQAFFGQKRYGADIYDIYGQVIEGQGRLAALRFGGDGDDLTRGGKPPVNHANIIAQQAQPITLNEQGEGVVTLPIGDFNGELRVMAQAWTADDFGRGESKVVVAAPVIAELNMPRFLAGGDVSRLVLDVTNLTDRPQTLNIALAASGLLELLSQQPQPVNLAPGVRTTLFVPVRALEGFGEGEIQATISGLNLPGETLGAQHKQWQIGVRPAWPAQTVNSGIALAPGESWHVPEQHLANVSPATLQGQLLLSGKPPLNLARYIRELKAYPYGCLEQTTSGLFPALYTNAAQLQSLGITGDSDEKRRAAVDIGISRILQMQRDNGGFALWDENGAEEPWLTAYAMDFLIRAGEQGYSVPPEAINRGNERLLRYLQDPGTMLIRYSDNTQASTFAAQAYAALVLARQQKAPLGALREIWERRSQAASGLPLMQLGIALNTMGDARRGEEAITLALNTPRQDERQWIADYGSSLRDNALMLSLLEENNLRPDAQNALLSSLSEQAFGQRWLSTQENNALFLAAHSRQASAGAWQAQTSLEAQPLSGDKALTRNLDADQLAALEVTNTGSQPLWLRLDSSGYPSSAPEPASNVLQIERQILGTDGQRKSLSSLRSGELVLVWLTVVADRNVPDALVVDLLPAGLELENQNLADSSASLPESGSEVQNLLNQMQQADIQYMEFRDDRFVAAVVVNEGQPVTLVYLARAVTPGTYQLAQPQVESMYAPQWRATGASEGLLIVTP* +>13 +MAKNRSRRLRKKMHIDEFQELGFSVAWRFPEGTSEEQIDKTVDDFINDVIEPNKLAFDGSGYLAWEGLICMQEIGKCTEEHQAIVRKWLEARNLEEVRTSELFDVWWD* +>14 +LKLIPFYLLALFSASSGATEINACKDLIGTWKTPADNPPYTVTILPPVEACGEKCVKLNVEYELDVTHRNALYCHERQEGVKGQGPMVIAFEGAYGGHAIGTYNRQLQLLRAGVIPKNKKWKWITKMENYWFRRVKAH* +>15 +MKKFDSCHPVFLLIGCAQVPLRSSVSKPVQQPSAQKEQLANANGIDECQSLPYVPSDLAKNKSLSNQIADNTASKNSAISSRIFCEKYKQTKEQALTFFQELPQYMRSKEVEEQHMTEFKKVLLEPGSKNLSIYQTLLAAHERLQAL* +>16 +MRIKPDDNWRWYYDEEHDRMMLDLANGMLFRSRFSRKMLTPDAFCPTGFCVDDAALYFSFEEKCRDFELTKEQRAELVLNALVAIRYLKPQMPKSWHFVAHGEMWTPGTGDAASVWLSDTAEQVNLLVVEPGENAALCLLAQPGVVIAGRTMQLGDAIKIMNDRLKPQVHCHSFSLEQAV* +>17 +VLAFTLRFIKNKRYFAILAGALVIIAGLDSQHACSGNGLPQINGKALAALAKQHPVVVLFRHAERCDRSDNTCLPDSTGITVNGAQDARALGKAFSADIQNYNLYSSNTVRAIQSATWFSAGRSRRAVKKMMDCGSGIYSSINTLLKNSQIKNIVILTHNHCLTYIVKNKRGVKFDPEYLNALVMYAENGKLLLDGEFVPG* +>18 +MGSNYIVIEGLEGAGKTTARDVVVETLEQLGIRNMIFTREPGGTQLAEKLRSLVLDIRSVGDEVITDKAEVLMFYAARVQLVETVIKPALAQGVWVIGDRHDLSTQAYQGGGRGIDQTMLATLRDAVLGDFRPDLTLYLDVTPEVGLKRARARGDLDRIEQESFDFFNRTRARYLELAAQDSRIRTIDATQPLDAVMRDIRATVTKWVQEQAA* +>19 +MKHIKKSVLVVLLTSHVAHASIVVGGTRLVFDGNNDESSINVENKDSKANLVQSWLSVADPQVTNKQVFIITPPLFRLDAGQKNSIRVIRSGAPLPADRESMYWLNIKGIPSIDDNASANRVEISINTQIKLIYRPPALTKSTPDSQSQQLKWQTAGDVITVNNPTPYYMNFASVTLNSHEVKSATFVPPKSSASFKLSSTAAPHGTVTWRLISDYGMSLEPHSGSF* diff --git a/tests/test_data/outputs/search/G7/nucleotide/hsps.txt b/tests/test_data/outputs/search/G7/nucleotide/hsps.txt new file mode 100755 index 0000000..61d17b6 --- /dev/null +++ b/tests/test_data/outputs/search/G7/nucleotide/hsps.txt @@ -0,0 +1,20 @@ +0 0 102 102 1 102 1 102 102 0 100.000 100 100 plus 1.80e-51 189 +1 9 762 762 3 762 3 762 760 0 100.000 99 99 plus 0.0 1404 +2 10 858 858 1 858 1 858 858 19 97.786 100 100 plus 0.0 1480 +3 11 972 972 1 972 1 972 972 0 100.000 100 100 plus 0.0 1796 +4 12 1098 1098 1 1098 1 1098 1098 0 100.000 100 100 plus 0.0 2028 +5 13 1281 1281 1 1281 1 1281 1281 11 99.141 100 100 plus 0.0 2305 +6 14 1434 1434 1 1434 1 1434 1434 0 100.000 100 100 plus 0.0 2649 +7 15 1464 1464 1 1464 1 1464 1464 15 98.975 100 100 plus 0.0 2621 +8 16 1836 1836 1 1836 1 1836 1836 0 100.000 100 100 plus 0.0 3391 +9 17 1914 1914 1 1914 1 1914 1914 0 100.000 100 100 plus 0.0 3535 +10 18 2037 2037 1 2037 1 2037 2037 16 99.215 100 100 plus 0.0 3674 +11 1 285 285 1 285 1 285 285 17 94.035 100 100 plus 2.31e-124 433 +12 19 4935 4935 1 4935 1 4935 4935 0 100.000 100 100 plus 0.0 9114 +13 2 327 327 1 327 1 327 327 0 100.000 100 100 plus 5.34e-176 604 +14 3 417 417 1 417 1 417 417 11 97.362 100 100 plus 0.0 710 +15 4 444 444 1 444 1 444 444 15 96.622 100 100 plus 0.0 737 +16 5 543 543 1 543 1 543 543 0 100.000 100 100 plus 0.0 1003 +17 6 606 606 1 606 1 606 606 15 97.525 100 100 plus 0.0 1037 +18 7 642 642 1 642 1 642 642 0 100.000 100 100 plus 0.0 1186 +19 8 684 684 1 684 1 684 684 0 100.000 100 100 plus 0.0 1264 diff --git a/tests/test_data/outputs/search/G7/nucleotide/queries.fasta b/tests/test_data/outputs/search/G7/nucleotide/queries.fasta new file mode 100755 index 0000000..b762e62 --- /dev/null +++ b/tests/test_data/outputs/search/G7/nucleotide/queries.fasta @@ -0,0 +1,40 @@ +>0 +atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa +>1 +gggcgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa +>2 +gtgaacctggtgaaaaccacgggtctgcacgcggcgggcaaaggcattaacgttgccaaagtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccgacttcaacttttccggctttgacgtgactcccgcagactgggaacgctttgttaacgactccctgagctggctgggtcagttcgatatggtctgcgtcagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatctttgatagtagccgtgaagcgttagtcgccggtcttaaagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcacgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgtgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggtaagtaccgtcggcgcaggcgattccatggttggcggattgatttacggcctgctgatgcgcgagtccagcgaacatacgctgcgtctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga +>3 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>4 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>5 +ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctcttccgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccaacgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcagaaaggcgggattggtttgcgcggtccgggtgaaacccagctcgaaaccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgaaaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgctgctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaaatccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtctgcaggtacgaatgccgattgtcgactggcgtcgcctctgtaaacaagaaccagcgttgatcgaatacgtgatctag +>6 +gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga +>7 +atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctggtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatcagcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacgctgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcattcaaccggaatggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaacgctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgaggcgctggcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtgagctgaccagccttgtgcgcaaccttaatcaactgctcaaaagcgagcgcgaacgttataacaaataccgcacgaccctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatcagcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcataccccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttttggccgacagcatcccacacagaaagaggaataa +>8 +atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa +>9 +atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag +>10 +atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacattgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccaccgtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggccgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattactggaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcattcagggcggtacggaagtttgtcagcaaaaactggataccatgcgcaacgcaggcgtaaaagtgaacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggcgtcagcgcagagatcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacaggagaccggcaagctcggcgagatcctgttctttatgcgtgcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccggcaataactggcagttcgacggcgacgcggaaactattgcccactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgaaattaccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa +>11 +atgaaactttttctgacaacggcagcgctaactgcgaccttgatttccggcatggcatttgccagcgatcccgtcatcccctgggcgactaacagcggcggtacggaaagcacgcatattgcggcaatgggcgaggatttgaatgcgcagcatcagcagatcacccacacgcatgaaggcgtatgggcggccaattccggcagcattcaggctgatgaagccgcgctgaccagcaacaagccgccggtacaaggccacccggaattgatgccgcatcagggctaa +>12 +atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa +>13 +atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa +>14 +ctgaaactaatacctttttatttgctcgcgctattttctgccgcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaaccactgcagataacccgccttataccatgacaatattgccgccagtagaaggctgcggggaaaaatgtgtgaaactgaacgtacagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaaggacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctataaccgacagctgcaattactttgggctggcgtaataccgaaaaataaaaaggggaaatggataacgaaaatggaaaattactggttcaggcaggtaaaagcgcactaa +>15 +atgaaaaaattttatagctgtcttcctgtctttttactgatcggctgtgcccaggtgcccctcccttcctccgtgagcaaaccggtacagcaacctggcgctcagaaagagcaactggccaacgcaaatagtattgatgagtgtcagtctcttccgtatgtgccgtcagaccttgcgaagaataaatcattatcaaaccagaacgctgataattccgcatcaaaaaatagcgcaatcagctcaagcattttttgcgaaaaatataaacaaaccaaagaacaggcgcttaccttcttccaggaacatccacaatacatgcgttcgaaagaagatgaagagcaactcatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa +>16 +atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa +>17 +gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggcctcacaacatgcctggagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgtcagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcaccatccagtcggcaacctggttttccgcaggcaggtcgcttacggcggataaaaaaatgatggattgcggcagcggtatttatgcttcaatcaatacgttactgaaaaagtcgcagaataaaaatatagtcatttttacccataatcattgtttgacgtatattgcaaaaaataagcgcggcgtgaagtttgatccggactatcttaatgcgctcgttatgtatgcggaaaacggaaaactctttttagacggtgaatttgttcccggctag +>18 +atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga +>19 +atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/search/G7/protein/hsps.txt b/tests/test_data/outputs/search/G7/protein/hsps.txt new file mode 100755 index 0000000..194fd46 --- /dev/null +++ b/tests/test_data/outputs/search/G7/protein/hsps.txt @@ -0,0 +1,20 @@ +0 0 34 34 1 34 1 34 34 0 100.000 100 100 N/A 4.94e-20 64.3 +1 9 254 254 2 254 2 254 253 0 100.000 99 99 N/A 0.0 512 +2 10 286 286 1 286 1 286 286 17 94.056 100 100 N/A 0.0 518 +3 11 324 324 1 324 1 324 324 0 100.000 100 100 N/A 0.0 640 +4 12 366 366 1 366 1 366 366 0 100.000 100 100 N/A 0.0 754 +5 13 427 427 1 427 1 427 427 9 97.892 100 100 N/A 0.0 846 +6 14 478 478 1 478 1 478 478 0 100.000 100 100 N/A 0.0 972 +7 15 488 488 1 488 1 488 488 14 97.131 100 100 N/A 0.0 972 +8 16 612 612 1 612 1 612 612 0 100.000 100 100 N/A 0.0 1241 +9 17 638 638 1 638 1 638 638 0 100.000 100 100 N/A 0.0 1326 +10 18 679 679 1 679 1 679 679 8 98.822 100 100 N/A 0.0 1403 +11 1 95 95 1 95 1 95 95 11 88.421 100 100 N/A 2.89e-51 147 +12 19 1645 1645 1 1645 1 1645 1645 0 100.000 100 100 N/A 0.0 3332 +13 2 109 109 1 109 1 109 109 0 100.000 100 100 N/A 3.09e-81 224 +14 3 139 139 1 139 1 139 139 9 93.525 100 100 N/A 3.70e-98 269 +15 4 148 148 1 148 1 148 148 11 92.568 100 100 N/A 1.56e-97 268 +16 5 181 181 1 181 1 181 181 0 100.000 100 100 N/A 1.79e-140 380 +17 6 202 202 1 202 1 202 202 14 93.069 100 100 N/A 2.36e-141 384 +18 7 214 214 1 214 1 214 214 0 100.000 100 100 N/A 1.75e-157 426 +19 8 228 228 1 228 1 228 228 0 100.000 100 100 N/A 6.32e-172 463 diff --git a/tests/test_data/outputs/search/G7/protein/queries.fasta b/tests/test_data/outputs/search/G7/protein/queries.fasta new file mode 100755 index 0000000..cad8adb --- /dev/null +++ b/tests/test_data/outputs/search/G7/protein/queries.fasta @@ -0,0 +1,40 @@ +>0 +MYDPPFLEALMITASFFAIFIIIVVSVLLLEGD* +>1 +GRILLSNDDGVHAPGIQTLAKALREFADVQVVAPDRNRSGASNSLTLESSLRTFTFDNGDIAVQMGTPTDCVYLGVNALMRPRPDIVVSGINAGPNLGDDVIYSGTVAAAMEGRHLGFPALAVSLNGYQHYDTAAAVTCALLRGLSREPLRTGRILNVNVPDLPLAQVKGIRVTRCGSRHPADKVIPQEDPRGNTLYWIGPPGDKYDAGPDTDFAAVDEGYVSVTPLHVDLTAHSAHDVVSDWLDSVGVGTQW* +>2 +VNLVKTTGLHAAGKGINVAKVLKDLGIDVTVGGFLGKDNQDGFQQLFSELGIANRFQVVQGRTRINVKLTEKDGEVTDFNFSGFDVTPADWERFVNDSLSWLGQFDMVCVSGSLPAGVSPEAFTDWMTRLRSQCPCIIFDSSREALVAGLKAAPWLVKPNRRELEIWAGRKLPEMKDVIDAAHALREQGIAHVVISLGAEGALWVNASGEWIAKPPAVDVVSTVGAGDSMVGGLIYGLLMRESSEHTLRLATAVAALAVSQSNVGITDRPQLAAMMARVDLQPFN* +>3 +MNKNKYSTPLLMLATILAGMLSPMQSAVNGQLGHWLQDGNACAVISFASGLVVMFFIIIARKETRQQFASIPTLIKKRKIPLWNWFAGLCGAMVVFSEGASASALGVATFQTALISALLLSGLLCDRFGIGVEEKKYFTPWRITGALFAVIATIFVVSPQWHSTSFILLAILPFLAGLLAGWQPAGNAKVAEATGSMLVSITWNFIVGFCVLGAALAIRIALGHVTIQLPDTWWMYLGGPLGLLSIGLMAILVRGLGLLMLGVASTAGQLLGSVLIDELIPSLGNTVYLVTIIGTLFALVGAIVTTIPEYRASKMAQKMEVSE* +>4 +MKGRWAKYVATGVMLAMLAACSSKPTDRGQQYKDGKFTQPFSLVNQPDAVGAPINAGDFAEQVDQIRSASPRLYTNQSNVYNAVQNWLRSGGDTRTMRQFGIDAWQMEGTDNYGNVQFTGYYTPVVQARHTRQGAFQYPIYSMPPKRGRLPSRAQIYAGALSDKYILAWSNSLMDNFIMDVQGSGYIDFGDGSPLNFFSYAGKNGWPYRSIGKVLIDRGEVKKEDMSMQAIREWGEKHSEAEVRELLEQNPSFVFFKPQSFAPVKGASAVPLIGRASVASDRSIIPPGTTLLAEVPLLDNNGKFSGQYELRLMVALDVGGAIKGQHFDIYQGIGPDAGHRAGWYNHYGRVWVLKSAPGAGNVFSG* +>5 +LFDRYDAGEQAVLVHIYFSQDKDMEDLQEFESLVSSAGVEAMQVITGSRKAPHPKYFVGEGKAVEIAEAVKATGAAVVLFDHALSPAQERNLERLCECRVIDRTGLILDIFAQRARTHEGKLQVELAQLRHLATRLVRGWTHLERQKGGIGLRGPGETQLETDRRLLRNRIVQIQSRLEKVEKQREQGRQSRIKADVPTVSLVGYTNAGKSTLFNQITEARVYAADQLFATLDPTLRRIDVADVGETVLADTVGFIRHLPYDLVAAFKATLQETRQATLLLHVVDAADVRVQENIEAVNTVLEEIDAHEIPTLMVMNKIDMLDDFEPRIDRDEENKPIRVWLSAQSGVGIPQLFQALTERLSGEVAQHTLRLLPQEGRLRSRFYQLQAIEKEWMEEDGSVSLQVRMPIVDWRRLCKQEPALIEYVI* +>6 +VAQRILVLGASGYIGQHLVFALSQQGHQVRAAARRVERLEKHRLANVSCHKVDLHWPENLPALLRDIDTVYYLVHGMGEGGDFIAHERQAALNVRDALRQTPVKQLIFLSSLQAPAHEQSDHLRARQLTADTLRDAGVPVTELRAGIIVGAGSAAFEVMRDMVYNLPILTPPRWVRSRTTPIALENLLYYLVGLLEHPAHEHRILEAAGPQVLSYQQQFERFMAVSGKRRPLIPVPFPTRWISVWFLNVITSVPPTTAKALIQGLRHDLLADDAALKKLIPQTLITFDDAVRRTLKEEEKLVNSSDWGYDALAFARWRPEYGYFPKQAGFTAQTPASLSALWQVVNRLGGKEGYFFGNILWQTRAAMDRLVGHKLAKGRPSHTLLKPGDTVDSWKVIIVEPEKQLTLLFGMKAPGLGRLSFTLHDKGRYREIDVRAWWHPHGMPGLIYWLLMIPAHLFIFRGMARRIARLAEQITEK* +>7 +MNKFARHFLPLSLRVRFLLATAGVVLVLSLAYGIVALVGYSVSFDKTTFRLLRGESNLFYTLAKWENNKISVELPENLDMQSPTMTLIYDETGKLLWTQRNIPWLIKSIQPEWLKTNGFHEIETNVDATSTLLSEDHSAQEKLKEVREDDDDAEMTHSVAVNIYPATTRMPQLTIVVVDTIPIELKRSYMVWSWFVYVLAANLLLVIPLLWIAAWWSLRPIEALAREVRELEDHHREMLNPETTRELTSLVRNLNQLLKSERERYNKYRTTLTDLTHSLKTPLAVLQSTLRSLRNEKMSVSKAEPVMLEQISRISQQIGYYLHRASMRGSGVLLSRELHPVAPLLDNLISALNKVYQRKGVNISMDISPEISFVGEQNDFVEVMGNVLDNACKYCLEFVEISARQTDDHLHIFVEDDGPGIPHSKRSLVFDRGQRADTLRPGQGVGLAVAREITEQYAGQIIASDSLLGGARMEVVFGRQHPTQKEE* +>8 +MTIQKRLLEAVEQKLLRPIDAQFALTVAGNDDPAVTLAAALLSHDAGEGHVCLPLSRLTLTEEAHPLLVAWISETATPIDWKKRLLASAAVSCGDSPAPLILCGDRLYLNRMWCNERTVARFFNEVNQAIAVDEDQLSRILDALFPPTDEVNWQKVAAAVALTRRISVISGGPGTGKTTTVAKLLAALIQMADGERCRIRLAAPTGKAAARLTESLGAALRQLPLTDAQKKRIPEDASTLHRLLGAQPGSQRLRHHAGNPLHLDVLVVDEASMIDLPMMSRLIDALPPHGRVIFLGDRDQLASVEAGAVLGDICAYVNAGFTAERARQLSRLTGSAIPAGAGTQAASLRDSLCLLQKSYRFGSDSGIGKLAAAINCGDRSAIQAVFQQGFSDIEKRTLQSSDDYAGMLDEALAGYGRYLRLLHEKAAPEAILQAFNEYQLLCALREGPFGVRGLNDRIEQAMVQQRKIQRHPHSRWYEGRPVMIARNDSALGLFNGDIGIALDRGQGLRVWFVMPDGTIKSVQPSRLPEHDTTWAMTVHKSQGSEFDHAALILPSQRSPVVTRELVYTAVTRARRRLSLYADERILAGAIVTRTERRSGLATLFDEVSRIG* +>9 +MQEVAMSSQEASKMLRTYNIAWWGNNYYDVNELGHISVCPDPDVPEARVDLAKLVKAREAQGQRLPALFCFPQILQHRLRSINAAFKRARESYGYNGDYFLVYPIKVNQHRRVIESLIHSGEPLGLEAGSKAELMAVLAHAGMTRSVIVCNGYKDREYIRLALIGEKMGHKVYLVIEKMSEIAIVLEEAERLNVVPRLGVRARLASQGSGKWQSSGGEKSKFGLAATQVLQLVETLRDAGRLDSLQLLHFHLGSQMANIRDIATGVRESARFYVELHKLGVNIQCFDVGGGLGVDYEGTRSQSDCSVNYGLNEYANNIIWAIGDACEEHGLPHPTVITESGRAVTAHHTVLVSNIIGVERNEYTDPTAPAEDAPRALQNLWETWQEMHKPGTRRSLREWLHDSQMDLHDIHIGYSSGAFSLQERAWAEQLYLSMCHEVQKQLDPQNRAHRPIIDELQERMADKMYVNFSLFQSMPDAWGIDQLFPVLPLEGLDQVPERRAVLLDITCDSDGAIDHYIDGDGIATTMPMPEYDPENPPMLGFFMVGAYQEILGNMHNLFGDTEAVDVFVFPDGSVEVELSDEGDTVADMLQYVQLDPKTLLTHFRDQVKQTDLDDALQQQFLEEFEAGLYGYTYLEDE* +>10 +MNSLPQRSTDFELTTSQDGFALSWQQRLILRHSAENPCLWIGAGVADIDMFRGNFSIKDKLNEKIALTEATVSELPDGWLVQFSRGATISATLRISADEAGRLTLDLQNDDLHHNRIWLRLAANPDDHIYGCGEQFSYFDLRGKPFPLWTSEQGVGRNKTSYVTWQADCKENAGGDYYWTFFPQPTFVSTQKYYCHVDNSCYMNFDFSAPEYHELALWEDKTTLRFECADTYIALLEKLTALLGRQPELPDWVYDGVTLGIQGGTEVCQQKLDTMRNAGVKVNGIWAQDWSGIRMTSFGKRVMWNWKWNSDNYPQLDSRIKQWKEEGVQFLSYINPYVASDKDLCAEAARHGYLAKDATGGDYLVEFGEFYGGVVDLTNPEAYDWFKDVIKKNMIALGCSGWMADFGEYLPTDTYLHNGVSAEIMHNAWPALWAKCNYEALQETGKLGEILFFMRAGYTGSQKYSTMMWAGDQNVDWSLDDGLASVVPAALSLAMTGHGLHHSDIGGYTTLFDMKRSKELLLRWCDFSAFTPMMRTHEGNRPGNNWQFDGDAETIAHFARMTTVFTTLKPYLKQAVAQNAATGLPVMRPLFLHYENDAATYTLKYQYLLGQDLLVAPVHEQGRCDWTLYLPEDHWVNIWTGEAHHGGEITVDAPIGKPPVFYRAKSEWALLFASLRNI* +>11 +MKLFLTTAALTATLISGMAFASDPVIPWATNSGGTESTHIAAMGEDLNAQHQQITHTHEGVWAANSGSIQADEAALTSNKPPVQGHPELMPHQG* +>12 +MKHLRVVACMIMLALAGCDNNDKTAPTTKSEAPAVAQPSPAQDPSQLQKLAQQSQGKALTLLDASEAQLDGAATLVLTFSIPLDPEQDFSRVVHVVDKKSGSVDGAWELAPNLKELRLRHLEPERVLVVTVDPAVKALNNATFGKSYEKTITTRDVQPSVGFASRGSLLPGKIAEGLPVMALNVNHVDVNFFRVKPGSLASFVSQWEYRSSLSNWESDNLLKMADLVYTGRFDLNPARNTREKLLLPLSDIKPLQQAGVYVAVMNQAGHYNYSNAATLFTLSDIGVSAHRYHSRLDIFTQSLENGAAQSGIEIVLLNDKGQTLAQATSDAQGHVQLEADKAAALLLARKEGQTTLLDLTLPALDLSEFNVAGAPGYSKQFFMFGPRDLYRPGETVILNGLLRDSDGKTLPDQPVKLEVVKPDGQVMRTVVSQPENGLYRLNYPLDINAPTGLWHVRANTGDNLLRSWDFHVEDFMPERMALNLTAQKTPLAPADEVKFSVVGYYLYGAPANGNTLQGQLFLRPLRDAVAALPGFQFGNIAEENLSRSLDEVQLTLDKGGRGEVSAASQWQEAHSPLQVILQASLLESGGRPVTRRVEQAIWPADTLPGIRPQFAAKAVYDYRTDTTVNQPIVDEDSNAAFDIVYANAQGEKKAVSGLQVRLIRERRDYYWNWSESEGWQSQFDQKDLVEGEQTLDLNADETGKVSFPVEWGAYRLEVKAPNETVSSVRFWAGYSWQDNSDGSGAARPDRVTLKLDKANYRPGDTMKLHIAAPVAGKGYAMVESSDGPLWWQAIDVPAQGLELTIPVDKTWNRHDLYLSTLVVRPGDKSRSATPKRAVGLLHLPLGDDNRRLDLALESPAKMRPNQPLTVRVKASVKHGEMPKQINVLVSAVDSGVLNITDYATPDPWQAFFGQKRYGADIYDIYGQVIEGQGRLAALRFGGDGDDLTRGGKPPVNHANIIAQQAQPITLNEQGEGVVTLPIGDFNGELRVMAQAWTADDFGRGESKVVVAAPVIAELNMPRFLAGGDVSRLVLDVTNLTDRPQTLNIALAASGLLELLSQQPQPVNLAPGVRTTLFVPVRALEGFGEGEIQATISGLNLPGETLGAQHKQWQIGVRPAWPAQTVNSGIALAPGESWHVPEQHLANVSPATLQGQLLLSGKPPLNLARYIRELKAYPYGCLEQTTSGLFPALYTNAAQLQSLGITGDSDEKRRAAVDIGISRILQMQRDNGGFALWDENGAEEPWLTAYAMDFLIRAGEQGYSVPPEAINRGNERLLRYLQDPGTMLIRYSDNTQASTFAAQAYAALVLARQQKAPLGALREIWERRSQAASGLPLMQLGIALNTMGDARRGEEAITLALNTPRQDERQWIADYGSSLRDNALMLSLLEENNLRPDAQNALLSSLSEQAFGQRWLSTQENNALFLAAHSRQASAGAWQAQTSLEAQPLSGDKALTRNLDADQLAALEVTNTGSQPLWLRLDSSGYPSSAPEPASNVLQIERQILGTDGQRKSLSSLRSGELVLVWLTVVADRNVPDALVVDLLPAGLELENQNLADSSASLPESGSEVQNLLNQMQQADIQYMEFRDDRFVAAVVVNEGQPVTLVYLARAVTPGTYQLAQPQVESMYAPQWRATGASEGLLIVTP* +>13 +MAKNRSRRLRKKMHIDEFQELGFSVAWRFPEGTSEEQIDKTVDDFINDVIEPNKLAFDGSGYLAWEGLICMQEIGKCTEEHQAIVRKWLEARNLEEVRTSELFDVWWD* +>14 +LKLIPFYLLALFSAASGATEINACKDLIGTWKTTADNPPYTMTILPPVEGCGEKCVKLNVQYELDVTHRNALYCHEGQEGVKGQGPMVIAFEGAYGGHAIGTYNRQLQLLWAGVIPKNKKGKWITKMENYWFRQVKAH* +>15 +MKKFYSCLPVFLLIGCAQVPLPSSVSKPVQQPGAQKEQLANANSIDECQSLPYVPSDLAKNKSLSNQNADNSASKNSAISSSIFCEKYKQTKEQALTFFQEHPQYMRSKEDEEQLMTEFKKVLLEPGSKNLSIYQTLLAAHERLQAL* +>16 +MRIKPDDNWRWYYDEEHDRMMLDLANGMLFRSRFSRKMLTPDAFCPTGFCVDDAALYFSFEEKCRDFELTKEQRAELVLNALVAIRYLKPQMPKSWHFVAHGEMWTPGTGDAASVWLSDTAEQVNLLVVEPGENAALCLLAQPGVVIAGRTMQLGDAIKIMNDRLKPQVHCHSFSLEQAV* +>17 +VLAFTLRFIKNKRYFAILAGALVIIAGLASQHAWSGNGLPQINGKALAALAKQHPVVVLFRHAERCDRSDNTCLSDSTGITVNGAQDARALGKAFSADIQNYNLYSSNTVRTIQSATWFSAGRSLTADKKMMDCGSGIYASINTLLKKSQNKNIVIFTHNHCLTYIAKNKRGVKFDPDYLNALVMYAENGKLFLDGEFVPG* +>18 +MGSNYIVIEGLEGAGKTTARDVVVETLEQLGIRNMIFTREPGGTQLAEKLRSLVLDIRSVGDEVITDKAEVLMFYAARVQLVETVIKPALAQGVWVIGDRHDLSTQAYQGGGRGIDQTMLATLRDAVLGDFRPDLTLYLDVTPEVGLKRARARGDLDRIEQESFDFFNRTRARYLELAAQDSRIRTIDATQPLDAVMRDIRATVTKWVQEQAA* +>19 +MKHIKKSVLVVLLTSHVAHASIVVGGTRLVFDGNNDESSINVENKDSKANLVQSWLSVADPQVTNKQVFIITPPLFRLDAGQKNSIRVIRSGAPLPADRESMYWLNIKGIPSIDDNASANRVEISINTQIKLIYRPPALTKSTPDSQSQQLKWQTAGDVITVNNPTPYYMNFASVTLNSHEVKSATFVPPKSSASFKLSSTAAPHGTVTWRLISDYGMSLEPHSGSF* diff --git a/tests/test_data/outputs/search/G8/nucleotide/hsps.txt b/tests/test_data/outputs/search/G8/nucleotide/hsps.txt new file mode 100755 index 0000000..ea86f3a --- /dev/null +++ b/tests/test_data/outputs/search/G8/nucleotide/hsps.txt @@ -0,0 +1,20 @@ +0 0 102 102 1 102 1 102 102 0 100.000 100 100 plus 1.80e-51 189 +1 9 762 762 4 762 4 762 759 0 100.000 99 99 plus 0.0 1402 +2 10 858 858 1 858 1 858 858 0 100.000 100 100 plus 0.0 1585 +3 11 972 972 1 972 1 972 972 0 100.000 100 100 plus 0.0 1796 +4 12 1098 1098 1 1098 1 1098 1098 0 100.000 100 100 plus 0.0 2028 +5 13 1281 1281 1 1281 1 1281 1281 0 100.000 100 100 plus 0.0 2366 +6 14 1434 1434 1 1434 1 1434 1434 0 100.000 100 100 plus 0.0 2649 +7 15 1464 1464 1 1464 1 1464 1464 0 100.000 100 100 plus 0.0 2704 +8 16 1836 1836 1 1836 1 1836 1836 0 100.000 100 100 plus 0.0 3391 +9 17 1914 1914 1 1914 1 1914 1914 0 100.000 100 100 plus 0.0 3535 +10 18 2037 2037 1 2037 1 2037 2037 0 100.000 100 100 plus 0.0 3762 +11 1 285 285 1 285 1 285 285 0 100.000 100 100 plus 1.03e-152 527 +12 19 4935 4935 1 4935 1 4935 4935 0 100.000 100 100 plus 0.0 9114 +13 2 327 327 1 327 1 327 327 0 100.000 100 100 plus 5.34e-176 604 +14 3 417 417 1 417 1 417 417 0 100.000 100 100 plus 0.0 771 +15 4 444 444 1 444 1 444 444 0 100.000 100 100 plus 0.0 821 +16 5 543 543 1 543 1 543 543 0 100.000 100 100 plus 0.0 1003 +17 6 606 606 1 606 1 606 606 0 100.000 100 100 plus 0.0 1120 +18 7 642 642 1 642 1 642 642 0 100.000 100 100 plus 0.0 1186 +19 8 684 684 1 684 1 684 684 0 100.000 100 100 plus 0.0 1264 diff --git a/tests/test_data/outputs/search/G8/nucleotide/queries.fasta b/tests/test_data/outputs/search/G8/nucleotide/queries.fasta new file mode 100755 index 0000000..232dea5 --- /dev/null +++ b/tests/test_data/outputs/search/G8/nucleotide/queries.fasta @@ -0,0 +1,40 @@ +>0 +atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa +>1 +aaacgcatattgctgagtaacgatgacggggttcacgcgcccggtatacaaacgctggcgaaagcgctgcgtgagtttgctgatgtacaggtcgtagccccggatcgtaaccgcagcggcgcgtctaattccctcacgctggaatcttcgcttcgtacttttacctttgataatggcgatatcgctgtacagatggggacgccgaccgattgcgtctatctgggcgttaatgccttaatgcgtccgcgtccggatattgtcgtttccggtattaacgcgggtccgaatctgggcgatgatgtgatctattccggtactgtcgccgcggcgatggaaggtcgtcatctcggctttccggcattagcggtctcccttaacggctatcagcattatgatacggctgcagccgtgacttgcgcgcttttgcgagggttaagccgggagccgttgcgtaccgggcgtattctcaacgtgaatgtcccggatctaccgttagcgcaggttaaaggcatccgcgtgactcgctgcggtagccgccatccagcggataaagtgatcccgcaggaagatccacgcggtaatacattgtactggattggcccgccgggtgataaatacgatgccgggccggataccgatttcgcggcggtggatgaaggctacgtctccgtcacgccgttgcatgtggatttaaccgcgcacagcgcgcatgatgtggtttcagactggttagacagcgtgggagttggcacgcaatggtaa +>2 +gtgaacctggtgaaaaccccgggtctgcacgcggcgggcaaaggcattaacgttgccaatgtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccaacttcaactttcccggctttgacgtgactcccgcagactgggaacgctttgttaacgacttcctgagctggctgggtcagttcgatatggtctgcgtaagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatcgttgatactagccgtgtagagttagtcgccggtcttatagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcccgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgcgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggaaaataccgtcggcgcaggcggttccatggttggcggatggatttacggcctgctgatgcgcgagtccaccgaacatacgctgcgcctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga +>3 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>4 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>5 +ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctattacgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccagcgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcggaaaggcgggattggttcgcgcggtccgggtgaaacccagctcgaagccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgagaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgcagctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaattccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtccgcaggtacgaatgccgattgtcgactggcgtcgcttctgtaaacaagaaccagcgttgatcgaatacgtgatctag +>6 +gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga +>7 +atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctcgtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatctgcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacggtgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcactcaaccggaagggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaaggctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgatgcgctgtcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtaagctgatcagccttgtgcgcaaccttaatcaactgctcaaaagcgagcacgaacgttataacaaataccgcacgagcctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatctgcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcatagcccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttctggccgacagcatcccacacagaaagaggaataa +>8 +atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa +>9 +atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag +>10 +atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacatcgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccacagtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggtcgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattacttgaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcatacagggcggtacggaagtttgtcagcaaaaactggataacatgcgcaacgcaggcgtaaaagtgtacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggggtcagcgcagagctcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacagaagaccggcaagctcggcgagatcctgttctttatgcgggcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccagcaatgactggcagttcgacggcgacgcggaaactattgcacactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgagatttccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa +>11 +atgaaactttttctgacaacggcagcgctaactgcgaccttgacttccggcatgggatttgccagcgataccgtgatcccctgggcgactaacagtggcggtacggaaagcacgcatatcgcggcaatgggcgaggatgtgaatgcgcagcatcagcacatcatccacacgcatgaaggcgtatgtgccgcaaattccggcaccattcaggctgatgaagccgcgctgaccagcaacaagccaccggtacaagtccaaccggaattgttgccgcatcagggctaa +>12 +atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa +>13 +atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa +>14 +ctgaaactaatacctttttatttgctcgcgctattttctgcctcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaacccctgcagataacccgccttataccgtgacaatattgccgccagtagaagcctgcggggaaaaatgtgtgaaactgaacgtagagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaacgacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctacaaccgacagctgcaattacttagggctggcgtaataccgaaaaataaaaagtggaaatggataacgaagatggaaaattactggttcaggcgggtaaaagcgcactaa +>15 +atgaaaaaatttgatagctgtcatcctgtctttttactgatcggctgtgcccaggtgcctctccgttcctccgtgagcaaaccggtacagcaacctagcgctcagaaagagcaactggccaacgcaaatggtattgatgagtgtcagtcacttccgtatgtgccttcagaccttgcgaagaataaatcattatcaaaccagatcgctgataataccgcatcaaaaaatagcgcaatcagctcacgcattttttgcgaaaaatataagcaaaccaaagaacaggcgcttaccttcttccaggaacttccacaatacatgcgttcgaaagaagttgaagagcaacacatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa +>16 +atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa +>17 +gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggactcacaacatgcctgtagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgccagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcgccatccagtcggcaacctggttttcggcaggcaggtcgcgtagggcggttaaaaaaatgatggattgcggcagcggtatttattcttcaatcaatacgttactgaaaaattcgcagattaaaaatatagtcattttaacccataatcattgtttgacgtatattgtaaaaaataagcgcggcgtgaagtttgatccggagtatcttaatgcgctcgttatgtatgcggaaaacggaaaactccttttagacggtgaatttgttcccggctag +>18 +atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga +>19 +atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/search/G8/protein/hsps.txt b/tests/test_data/outputs/search/G8/protein/hsps.txt new file mode 100755 index 0000000..4b86eab --- /dev/null +++ b/tests/test_data/outputs/search/G8/protein/hsps.txt @@ -0,0 +1,20 @@ +0 0 34 34 1 34 1 34 34 0 100.000 100 100 N/A 4.94e-20 64.3 +1 9 254 254 2 254 2 254 253 0 100.000 99 99 N/A 0.0 512 +2 10 286 286 1 286 1 286 286 0 100.000 100 100 N/A 0.0 579 +3 11 324 324 1 324 1 324 324 0 100.000 100 100 N/A 0.0 640 +4 12 366 366 1 366 1 366 366 0 100.000 100 100 N/A 0.0 754 +5 13 427 427 1 427 1 427 427 0 100.000 100 100 N/A 0.0 869 +6 14 478 478 1 478 1 478 478 0 100.000 100 100 N/A 0.0 972 +7 15 488 488 1 488 1 488 488 0 100.000 100 100 N/A 0.0 1004 +8 16 612 612 1 612 1 612 612 0 100.000 100 100 N/A 0.0 1241 +9 17 638 638 1 638 1 638 638 0 100.000 100 100 N/A 0.0 1326 +10 18 679 679 1 679 1 679 679 0 100.000 100 100 N/A 0.0 1419 +11 1 95 95 1 95 1 95 95 0 100.000 100 100 N/A 1.23e-68 191 +12 19 1645 1645 1 1645 1 1645 1645 0 100.000 100 100 N/A 0.0 3332 +13 2 109 109 1 109 1 109 109 0 100.000 100 100 N/A 3.09e-81 224 +14 3 139 139 1 139 1 139 139 0 100.000 100 100 N/A 2.34e-106 290 +15 4 148 148 1 148 1 148 148 0 100.000 100 100 N/A 1.04e-110 302 +16 5 181 181 1 181 1 181 181 0 100.000 100 100 N/A 1.79e-140 380 +17 6 202 202 1 202 1 202 202 0 100.000 100 100 N/A 3.71e-154 416 +18 7 214 214 1 214 1 214 214 0 100.000 100 100 N/A 1.75e-157 426 +19 8 228 228 1 228 1 228 228 0 100.000 100 100 N/A 6.32e-172 463 diff --git a/tests/test_data/outputs/search/G8/protein/queries.fasta b/tests/test_data/outputs/search/G8/protein/queries.fasta new file mode 100755 index 0000000..cb18c87 --- /dev/null +++ b/tests/test_data/outputs/search/G8/protein/queries.fasta @@ -0,0 +1,40 @@ +>0 +MYDPPFLEALMITASFFAIFIIIVVSVLLLEGD* +>1 +KRILLSNDDGVHAPGIQTLAKALREFADVQVVAPDRNRSGASNSLTLESSLRTFTFDNGDIAVQMGTPTDCVYLGVNALMRPRPDIVVSGINAGPNLGDDVIYSGTVAAAMEGRHLGFPALAVSLNGYQHYDTAAAVTCALLRGLSREPLRTGRILNVNVPDLPLAQVKGIRVTRCGSRHPADKVIPQEDPRGNTLYWIGPPGDKYDAGPDTDFAAVDEGYVSVTPLHVDLTAHSAHDVVSDWLDSVGVGTQW* +>2 +VNLVKTPGLHAAGKGINVANVLKDLGIDVTVGGFLGKDNQDGFQQLFSELGIANRFQVVQGRTRINVKLTEKDGEVTNFNFPGFDVTPADWERFVNDFLSWLGQFDMVCVSGSLPAGVSPEAFTDWMTRLRSQCPCIIVDTSRVELVAGLIAAPWLVKPNRRELEIWAGRKLPEMKDVIDAAPALREQGIAHVVISLGAEGALRVNASGEWIAKPPAVDVENTVGAGGSMVGGWIYGLLMRESTEHTLRLATAVAALAVSQSNVGITDRPQLAAMMARVDLQPFN* +>3 +MNKNKYSTPLLMLATILAGMLSPMQSAVNGQLGHWLQDGNACAVISFASGLVVMFFIIIARKETRQQFASIPTLIKKRKIPLWNWFAGLCGAMVVFSEGASASALGVATFQTALISALLLSGLLCDRFGIGVEEKKYFTPWRITGALFAVIATIFVVSPQWHSTSFILLAILPFLAGLLAGWQPAGNAKVAEATGSMLVSITWNFIVGFCVLGAALAIRIALGHVTIQLPDTWWMYLGGPLGLLSIGLMAILVRGLGLLMLGVASTAGQLLGSVLIDELIPSLGNTVYLVTIIGTLFALVGAIVTTIPEYRASKMAQKMEVSE* +>4 +MKGRWAKYVATGVMLAMLAACSSKPTDRGQQYKDGKFTQPFSLVNQPDAVGAPINAGDFAEQVDQIRSASPRLYTNQSNVYNAVQNWLRSGGDTRTMRQFGIDAWQMEGTDNYGNVQFTGYYTPVVQARHTRQGAFQYPIYSMPPKRGRLPSRAQIYAGALSDKYILAWSNSLMDNFIMDVQGSGYIDFGDGSPLNFFSYAGKNGWPYRSIGKVLIDRGEVKKEDMSMQAIREWGEKHSEAEVRELLEQNPSFVFFKPQSFAPVKGASAVPLIGRASVASDRSIIPPGTTLLAEVPLLDNNGKFSGQYELRLMVALDVGGAIKGQHFDIYQGIGPDAGHRAGWYNHYGRVWVLKSAPGAGNVFSG* +>5 +LFDRYDAGEQAVLVHIYFSQDKDMEDLQEFESLVYYAGVEAMQVITGSRKAPHPKYFVGEGKAVEIAEAVKATGAAVVLFDHALSPAQERNLERLCECRVIDRTGLILDIFAQRARTHEGKLQVELAQLRHLATRLVRGWTHLERRKGGIGSRGPGETQLEADRRLLRNRIVQIQSRLEKVEKQREQGRQSRIKADVPTVSLVGYTNAGKSTLFNQITEARVYAADQLFATLDPTLRRIDVADVGETVLADTVGFIRHLPYDLVAAFKATLQETRQATQLLHVVDAADVRVQENIEAVNTVLEEIDAHEFPTLMVMNKIDMLDDFEPRIDRDEENKPIRVWLSAQSGVGIPQLFQALTERLSGEVAQHTLRLLPQEGRLRSRFYQLQAIEKEWMEEDGSVSPQVRMPIVDWRRFCKQEPALIEYVI* +>6 +VAQRILVLGASGYIGQHLVFALSQQGHQVRAAARRVERLEKHRLANVSCHKVDLHWPENLPALLRDIDTVYYLVHGMGEGGDFIAHERQAALNVRDALRQTPVKQLIFLSSLQAPAHEQSDHLRARQLTADTLRDAGVPVTELRAGIIVGAGSAAFEVMRDMVYNLPILTPPRWVRSRTTPIALENLLYYLVGLLEHPAHEHRILEAAGPQVLSYQQQFERFMAVSGKRRPLIPVPFPTRWISVWFLNVITSVPPTTAKALIQGLRHDLLADDAALKKLIPQTLITFDDAVRRTLKEEEKLVNSSDWGYDALAFARWRPEYGYFPKQAGFTAQTPASLSALWQVVNRLGGKEGYFFGNILWQTRAAMDRLVGHKLAKGRPSHTLLKPGDTVDSWKVIIVEPEKQLTLLFGMKAPGLGRLSFTLHDKGRYREIDVRAWWHPHGMPGLIYWLLMIPAHLFIFRGMARRIARLAEQITEK* +>7 +MNKFARHFLPLSLRVRFLLATAGVVLVLSLAYGIVALVGYSVSFDKTTFRLLRGESNLFYTLAKWENNKICVELPENLDMQSPTMTVIYDETGKLLWTQRNIPWLIKSTQPEGLKTNGFHEIETNVDATSTLLSEDHSAQEKLKEVREDDDDAEMTHSVAVNIYPATTRMPQLTIVVVDTIPIELKGSYMVWSWFVYVLAANLLLVIPLLWIAAWWSLRPIDALSREVRELEDHHREMLNPETTRKLISLVRNLNQLLKSEHERYNKYRTSLTDLTHSLKTPLAVLQSTLRSLRNEKMSVSKAEPVMLEQISRISQQIGYYLHRASMRGSGVLLSRELHPVAPLLDNLISALNKVYLRKGVNISMDISPEISFVGEQNDFVEVMGNVLDNACKYCLEFVEISARQTDDHLHIFVEDDGPGIAHSKRSLVFDRGQRADTLRPGQGVGLAVAREITEQYAGQIIASDSLLGGARMEVVSGRQHPTQKEE* +>8 +MTIQKRLLEAVEQKLLRPIDAQFALTVAGNDDPAVTLAAALLSHDAGEGHVCLPLSRLTLTEEAHPLLVAWISETATPIDWKKRLLASAAVSCGDSPAPLILCGDRLYLNRMWCNERTVARFFNEVNQAIAVDEDQLSRILDALFPPTDEVNWQKVAAAVALTRRISVISGGPGTGKTTTVAKLLAALIQMADGERCRIRLAAPTGKAAARLTESLGAALRQLPLTDAQKKRIPEDASTLHRLLGAQPGSQRLRHHAGNPLHLDVLVVDEASMIDLPMMSRLIDALPPHGRVIFLGDRDQLASVEAGAVLGDICAYVNAGFTAERARQLSRLTGSAIPAGAGTQAASLRDSLCLLQKSYRFGSDSGIGKLAAAINCGDRSAIQAVFQQGFSDIEKRTLQSSDDYAGMLDEALAGYGRYLRLLHEKAAPEAILQAFNEYQLLCALREGPFGVRGLNDRIEQAMVQQRKIQRHPHSRWYEGRPVMIARNDSALGLFNGDIGIALDRGQGLRVWFVMPDGTIKSVQPSRLPEHDTTWAMTVHKSQGSEFDHAALILPSQRSPVVTRELVYTAVTRARRRLSLYADERILAGAIVTRTERRSGLATLFDEVSRIG* +>9 +MQEVAMSSQEASKMLRTYNIAWWGNNYYDVNELGHISVCPDPDVPEARVDLAKLVKAREAQGQRLPALFCFPQILQHRLRSINAAFKRARESYGYNGDYFLVYPIKVNQHRRVIESLIHSGEPLGLEAGSKAELMAVLAHAGMTRSVIVCNGYKDREYIRLALIGEKMGHKVYLVIEKMSEIAIVLEEAERLNVVPRLGVRARLASQGSGKWQSSGGEKSKFGLAATQVLQLVETLRDAGRLDSLQLLHFHLGSQMANIRDIATGVRESARFYVELHKLGVNIQCFDVGGGLGVDYEGTRSQSDCSVNYGLNEYANNIIWAIGDACEEHGLPHPTVITESGRAVTAHHTVLVSNIIGVERNEYTDPTAPAEDAPRALQNLWETWQEMHKPGTRRSLREWLHDSQMDLHDIHIGYSSGAFSLQERAWAEQLYLSMCHEVQKQLDPQNRAHRPIIDELQERMADKMYVNFSLFQSMPDAWGIDQLFPVLPLEGLDQVPERRAVLLDITCDSDGAIDHYIDGDGIATTMPMPEYDPENPPMLGFFMVGAYQEILGNMHNLFGDTEAVDVFVFPDGSVEVELSDEGDTVADMLQYVQLDPKTLLTHFRDQVKQTDLDDALQQQFLEEFEAGLYGYTYLEDE* +>10 +MNSLPQRSTDFELTTSQDGFALSWQQRLILRHSAENPCLWIGAGVADIDMFRGNFSIKDKLNEKIALTEATVSELPDGWLVQFSRGATISATLRISADEAGRLTLDLQNDDLHHNRIWLRLAANPDDHIYGCGEQFSYFDLRGKPFPLWTSEQGVGRNKTSYVTWQADCKENAGGDYYLTFFPQPTFVSTQKYYCHVDNSCYMNFDFSAPEYHELALWEDKTTLRFECADTYIALLEKLTALLGRQPELPDWVYDGVTLGIQGGTEVCQQKLDNMRNAGVKVYGIWAQDWSGIRMTSFGKRVMWNWKWNSDNYPQLDSRIKQWKEEGVQFLSYINPYVASDKDLCAEAARHGYLAKDATGGDYLVEFGEFYGGVVDLTNPEAYDWFKDVIKKNMIALGCSGWMADFGEYLPTDTYLHNGVSAELMHNAWPALWAKCNYEALQKTGKLGEILFFMRAGYTGSQKYSTMMWAGDQNVDWSLDDGLASVVPAALSLAMTGHGLHHSDIGGYTTLFDMKRSKELLLRWCDFSAFTPMMRTHEGNRPSNDWQFDGDAETIAHFARMTTVFTTLKPYLKQAVAQNAATGLPVMRPLFLHYENDAATYTLKYQYLLGQDLLVAPVHEQGRCDWTLYLPEDHWVNIWTGEAHHGGEISVDAPIGKPPVFYRAKSEWALLFASLRNI* +>11 +MKLFLTTAALTATLTSGMGFASDTVIPWATNSGGTESTHIAAMGEDVNAQHQHIIHTHEGVCAANSGTIQADEAALTSNKPPVQVQPELLPHQG* +>12 +MKHLRVVACMIMLALAGCDNNDKTAPTTKSEAPAVAQPSPAQDPSQLQKLAQQSQGKALTLLDASEAQLDGAATLVLTFSIPLDPEQDFSRVVHVVDKKSGSVDGAWELAPNLKELRLRHLEPERVLVVTVDPAVKALNNATFGKSYEKTITTRDVQPSVGFASRGSLLPGKIAEGLPVMALNVNHVDVNFFRVKPGSLASFVSQWEYRSSLSNWESDNLLKMADLVYTGRFDLNPARNTREKLLLPLSDIKPLQQAGVYVAVMNQAGHYNYSNAATLFTLSDIGVSAHRYHSRLDIFTQSLENGAAQSGIEIVLLNDKGQTLAQATSDAQGHVQLEADKAAALLLARKEGQTTLLDLTLPALDLSEFNVAGAPGYSKQFFMFGPRDLYRPGETVILNGLLRDSDGKTLPDQPVKLEVVKPDGQVMRTVVSQPENGLYRLNYPLDINAPTGLWHVRANTGDNLLRSWDFHVEDFMPERMALNLTAQKTPLAPADEVKFSVVGYYLYGAPANGNTLQGQLFLRPLRDAVAALPGFQFGNIAEENLSRSLDEVQLTLDKGGRGEVSAASQWQEAHSPLQVILQASLLESGGRPVTRRVEQAIWPADTLPGIRPQFAAKAVYDYRTDTTVNQPIVDEDSNAAFDIVYANAQGEKKAVSGLQVRLIRERRDYYWNWSESEGWQSQFDQKDLVEGEQTLDLNADETGKVSFPVEWGAYRLEVKAPNETVSSVRFWAGYSWQDNSDGSGAARPDRVTLKLDKANYRPGDTMKLHIAAPVAGKGYAMVESSDGPLWWQAIDVPAQGLELTIPVDKTWNRHDLYLSTLVVRPGDKSRSATPKRAVGLLHLPLGDDNRRLDLALESPAKMRPNQPLTVRVKASVKHGEMPKQINVLVSAVDSGVLNITDYATPDPWQAFFGQKRYGADIYDIYGQVIEGQGRLAALRFGGDGDDLTRGGKPPVNHANIIAQQAQPITLNEQGEGVVTLPIGDFNGELRVMAQAWTADDFGRGESKVVVAAPVIAELNMPRFLAGGDVSRLVLDVTNLTDRPQTLNIALAASGLLELLSQQPQPVNLAPGVRTTLFVPVRALEGFGEGEIQATISGLNLPGETLGAQHKQWQIGVRPAWPAQTVNSGIALAPGESWHVPEQHLANVSPATLQGQLLLSGKPPLNLARYIRELKAYPYGCLEQTTSGLFPALYTNAAQLQSLGITGDSDEKRRAAVDIGISRILQMQRDNGGFALWDENGAEEPWLTAYAMDFLIRAGEQGYSVPPEAINRGNERLLRYLQDPGTMLIRYSDNTQASTFAAQAYAALVLARQQKAPLGALREIWERRSQAASGLPLMQLGIALNTMGDARRGEEAITLALNTPRQDERQWIADYGSSLRDNALMLSLLEENNLRPDAQNALLSSLSEQAFGQRWLSTQENNALFLAAHSRQASAGAWQAQTSLEAQPLSGDKALTRNLDADQLAALEVTNTGSQPLWLRLDSSGYPSSAPEPASNVLQIERQILGTDGQRKSLSSLRSGELVLVWLTVVADRNVPDALVVDLLPAGLELENQNLADSSASLPESGSEVQNLLNQMQQADIQYMEFRDDRFVAAVVVNEGQPVTLVYLARAVTPGTYQLAQPQVESMYAPQWRATGASEGLLIVTP* +>13 +MAKNRSRRLRKKMHIDEFQELGFSVAWRFPEGTSEEQIDKTVDDFINDVIEPNKLAFDGSGYLAWEGLICMQEIGKCTEEHQAIVRKWLEARNLEEVRTSELFDVWWD* +>14 +LKLIPFYLLALFSASSGATEINACKDLIGTWKTPADNPPYTVTILPPVEACGEKCVKLNVEYELDVTHRNALYCHERQEGVKGQGPMVIAFEGAYGGHAIGTYNRQLQLLRAGVIPKNKKWKWITKMENYWFRRVKAH* +>15 +MKKFDSCHPVFLLIGCAQVPLRSSVSKPVQQPSAQKEQLANANGIDECQSLPYVPSDLAKNKSLSNQIADNTASKNSAISSRIFCEKYKQTKEQALTFFQELPQYMRSKEVEEQHMTEFKKVLLEPGSKNLSIYQTLLAAHERLQAL* +>16 +MRIKPDDNWRWYYDEEHDRMMLDLANGMLFRSRFSRKMLTPDAFCPTGFCVDDAALYFSFEEKCRDFELTKEQRAELVLNALVAIRYLKPQMPKSWHFVAHGEMWTPGTGDAASVWLSDTAEQVNLLVVEPGENAALCLLAQPGVVIAGRTMQLGDAIKIMNDRLKPQVHCHSFSLEQAV* +>17 +VLAFTLRFIKNKRYFAILAGALVIIAGLDSQHACSGNGLPQINGKALAALAKQHPVVVLFRHAERCDRSDNTCLPDSTGITVNGAQDARALGKAFSADIQNYNLYSSNTVRAIQSATWFSAGRSRRAVKKMMDCGSGIYSSINTLLKNSQIKNIVILTHNHCLTYIVKNKRGVKFDPEYLNALVMYAENGKLLLDGEFVPG* +>18 +MGSNYIVIEGLEGAGKTTARDVVVETLEQLGIRNMIFTREPGGTQLAEKLRSLVLDIRSVGDEVITDKAEVLMFYAARVQLVETVIKPALAQGVWVIGDRHDLSTQAYQGGGRGIDQTMLATLRDAVLGDFRPDLTLYLDVTPEVGLKRARARGDLDRIEQESFDFFNRTRARYLELAAQDSRIRTIDATQPLDAVMRDIRATVTKWVQEQAA* +>19 +MKHIKKSVLVVLLTSHVAHASIVVGGTRLVFDGNNDESSINVENKDSKANLVQSWLSVADPQVTNKQVFIITPPLFRLDAGQKNSIRVIRSGAPLPADRESMYWLNIKGIPSIDDNASANRVEISINTQIKLIYRPPALTKSTPDSQSQQLKWQTAGDVITVNNPTPYYMNFASVTLNSHEVKSATFVPPKSSASFKLSSTAAPHGTVTWRLISDYGMSLEPHSGSF* diff --git a/tests/test_data/outputs/search/G9/nucleotide/hsps.txt b/tests/test_data/outputs/search/G9/nucleotide/hsps.txt new file mode 100755 index 0000000..6654f25 --- /dev/null +++ b/tests/test_data/outputs/search/G9/nucleotide/hsps.txt @@ -0,0 +1,19 @@ +0 0 102 102 1 102 1 102 102 0 100.000 100 100 plus 1.80e-51 189 +1 10 858 858 1 858 1 858 858 19 97.786 100 100 plus 0.0 1480 +2 11 972 972 1 972 1 972 972 0 100.000 100 100 plus 0.0 1796 +3 12 1098 1098 1 1098 1 1098 1098 0 100.000 100 100 plus 0.0 2028 +4 13 1281 1281 1 1281 1 1281 1281 11 99.141 100 100 plus 0.0 2305 +5 14 1434 1434 1 1434 1 1434 1434 0 100.000 100 100 plus 0.0 2649 +6 15 1464 1464 1 1464 1 1464 1464 15 98.975 100 100 plus 0.0 2621 +7 16 1836 1836 1 1836 1 1836 1836 0 100.000 100 100 plus 0.0 3391 +8 17 1914 1914 1 1914 1 1914 1914 0 100.000 100 100 plus 0.0 3535 +9 18 2037 2037 1 2037 1 2037 2037 16 99.215 100 100 plus 0.0 3674 +10 1 285 285 1 285 1 285 285 17 94.035 100 100 plus 2.31e-124 433 +11 19 4935 4935 1 4935 1 4935 4935 0 100.000 100 100 plus 0.0 9114 +12 2 327 327 1 327 1 327 327 0 100.000 100 100 plus 5.34e-176 604 +13 3 417 417 1 417 1 417 417 11 97.362 100 100 plus 0.0 710 +14 4 444 444 1 444 1 444 444 15 96.622 100 100 plus 0.0 737 +15 5 543 543 1 543 1 543 543 0 100.000 100 100 plus 0.0 1003 +16 6 606 606 1 606 1 606 606 15 97.525 100 100 plus 0.0 1037 +17 7 642 642 1 642 1 642 642 0 100.000 100 100 plus 0.0 1186 +18 8 684 684 1 684 1 684 684 0 100.000 100 100 plus 0.0 1264 diff --git a/tests/test_data/outputs/search/G9/nucleotide/queries.fasta b/tests/test_data/outputs/search/G9/nucleotide/queries.fasta new file mode 100755 index 0000000..c89bc4d --- /dev/null +++ b/tests/test_data/outputs/search/G9/nucleotide/queries.fasta @@ -0,0 +1,38 @@ +>0 +atgtacgatccaccgtttcttgaagcgctgatgataacagcgtcgtttttcgccatttttatcattattgttgtgtctgtattgctccttgaaggggactaa +>1 +gtgaacctggtgaaaaccacgggtctgcacgcggcgggcaaaggcattaacgttgccaaagtgctgaaagacttaggcatcgacgtcactgtcggcggttttctcggtaaagataaccaggacggttttcagcaattattcagcgaactgggtatcgctaaccgctttcaggtggttcagggccggacccgcatcaacgtgaagctgacggaaaaagatggcgaagtgaccgacttcaacttttccggctttgacgtgactcccgcagactgggaacgctttgttaacgactccctgagctggctgggtcagttcgatatggtctgcgtcagcggtagcttaccggctggcgtgagtccggaagcgttcaccgactggatgacgcgtctgcgcagccagtgtccatgcattatctttgatagtagccgtgaagcgttagtcgccggtcttaaagctgcgccgtggctggtgaaaccgaatcgccgcgaactggaaatttgggcgggtcgtaagctcccggaaatgaaagatgtgattgatgcggcgcacgcgttacgcgaacagggtatcgctcatgtggtgatttcgctgggggcggaaggcgcgctgtgggttaacgcctcaggagaatggatcgctaaaccaccggcggttgacgtggtaagtaccgtcggcgcaggcgattccatggttggcggattgatttacggcctgctgatgcgcgagtccagcgaacatacgctgcgtctggcgacggcggttgccgcgctggcggtcagccagagcaatgtgggtattaccgatcgtcctcagttggccgcaatgatggcgcgtgtcgacttacaaccgtttaactga +>2 +atgaacaagaataagtactcaacccccttactgatgcttgccaccatcttagccggtatgctttccccgatgcagtctgccgttaatgggcagttaggacattggttacaagacggtaatgcttgcgcagtaatctcgtttgccagcggtctggtcgtcatgttttttattattattgcgcgcaaagaaacacgccagcaattcgcctcaatccctactctgattaaaaagagaaaaatcccgctatggaattggtttgcaggattatgcggggcgatggtcgtattttctgaaggcgcatccgccagcgcgctgggcgttgccacctttcaaacggcattaatttccgctctgcttctttccggcctgctgtgcgaccgttttggcatcggcgtggaggagaaaaaatattttacgccatggcgcattactggcgcattgttcgccgttatcgccaccatttttgtcgtgtcgccacagtggcactcaacctcgtttatcctgctcgctatcctcccctttttggcagggttactcgctggctggcagcctgcggggaatgcgaaagtcgccgaggcaacgggctccatgctggtgtccattacatggaacttcatcgtcggcttttgtgtcctgggcgcggcgctggcgatacgtatcgcattaggccatgtcaccatccagttacccgatacatggtggatgtatctcggtggtccgctgggtctgctatccatcgggctaatggcgattctggtgagaggcttagggctattgatgctgggcgtagcgtcaacagcaggccaactgcttggctcagtgctaattgatgaattgatcccttcattaggcaatacggtctacctggtcaccatcatcggtacgctgttcgcgttagtcggtgcgattgtgaccaccatccctgaatacagagcatccaaaatggcgcaaaaaatggaggtgtcagaatga +>3 +atgaaaggacgttgggcaaaatatgtggcgacaggcgtaatgctggcaatgctggctgcctgttcatcaaaaccgaccgatcgcggtcagcaatataaggacgggaaatttacccagcccttctccctggtaaaccagccggacgcggtgggcgcgccgattaatgctggtgactttgccgagcaggtcgaccagatccgttccgcgtcgccgcgtttatacaccaatcaaagcaacgtctataacgctgtccagaactggttacgttccggaggcgatacgcgcactatgcgtcagtttggtatcgatgcctggcagatggaaggcaccgacaactacggtaacgtccagttcacgggctactatacgcccgtcgttcaggctcgccatacgcgccagggcgcgttccagtaccctatctatagtatgccgccaaaacgcggacgcttaccgtcccgcgcccagatctacgcaggcgcgctgagcgataagtacatcctggcctggagtaattcgctgatggataactttattatggatgtccagggcagcggttatattgatttcggtgatggtagtccgctgaacttctttagttatgccggtaaaaatggctggccttatcgcagtattggtaaagtgcttatcgatcgcggtgaagtgaaaaaagaagatatgtcgatgcaggctatacgcgaatggggagagaagcacagcgaagcggaagtgcgtgaattactggagcagaacccgtcgttcgtcttctttaaaccgcaatcttttgcgccggtcaaaggcgccagcgctgtaccgctgattgggcgagcatccgtcgcctccgaccgcagcatcattccgccgggcacaaccttgttggcggaagtaccgttgctggataacaacggtaaatttagcggtcagtatgagctacgtttgatggtcgcactggatgtcggcggagcgattaaaggccagcactttgatatttaccaggggattggcccggatgccgggcatcgtgccggatggtataatcactatggtcgcgtctgggtgttaaaaagcgcgccgggcgccggtaacgtgtttagcggctga +>4 +ttgtttgaccgttatgatgccggtgagcaggcggtactggtacacatctatttttcgcaagacaaagatatggaagacctccaggagtttgaatctctggtctcttccgccggtgtcgaagcaatgcaggtgattaccggtagccgtaaagcaccgcacccgaagtactttgtaggtgaaggtaaggcagttgaaattgcggaagccgtgaaagcgactggcgcagcggtcgtattgtttgatcatgcattgagtccagcccaggagcgaaacctggagcgtttgtgcgagtgccgggttatcgatcgcaccggtcttatcttagatatttttgcccaacgtgcgcgtacccatgaaggtaagttgcaggttgagctggcgcagctacgccatctggctacgcgtctggtgcgtggctggacccaccttgaacgtcagaaaggcgggattggtttgcgcggtccgggtgaaacccagctcgaaaccgaccgtcgtttactgcgtaatcgcattgtgcagattcagtcgcgcctggagaaagttgagaagcaacgtgagcaggggcggcagtcgcgcatcaaggccgacgttccgacggtatcgctggtgggctataccaacgccggaaaatccaccctttttaatcagatcactgaagcgcgggtctatgcggcagatcagctatttgcgacgctggaccccacgttacgtcgtattgatgtagcggatgtcggtgaaaccgttctggcggatacggtaggctttatccgccatttaccgtacgatctggtggctgcctttaaagctaccctgcaggagacgcgtcaggcgacgctgctgctgcatgtggtcgatgcggcggatgttcgtgtgcaggaaaacatcgaggcggtaaacaccgttcttgaagagattgacgctcacgaaatccccaccttaatggtgatgaacaaaatcgatatgctggacgactttgaaccgcgtatcgacagagatgaagagaataaacccatccgcgtttggctttcagcgcaaagcggcgtgggaataccacagctttttcaggctttgacggagcgtctttccggcgaggtggcgcagcatacgctgcgtttgctgccgcaggaagggcgtctgagaagccggttttatcagcttcaggcaatagaaaaagagtggatggaggaagacggtagcgtcagtctgcaggtacgaatgccgattgtcgactggcgtcgcctctgtaaacaagaaccagcgttgatcgaatacgtgatctag +>5 +gtggcgcaacgcattctggtcctcggcgccagcggctatatcggtcagcacctggtctttgcgctaagtcagcaaggacatcaggtgcgggcggcggcgcggcgcgtggaacgtctggaaaaacatcgcctcgccaacgtcagttgtcataaggtcgatctgcactggccggaaaatttacccgcgctgcttcgcgacattgataccgtttactatctggtacacggcatgggcgaaggcggcgactttatcgcccatgagcgtcaggcggcgctcaacgtgcgcgacgcgctgcgccagacgccggttaaacaacttattttcctcagttcattgcaggcgccggcgcatgagcaatccgatcacctgcgcgcccgccagcttacggctgacacgctgcgcgacgcaggcgtaccggtgacggaattacgcgccgggatcatcgtcggcgcaggctccgccgcctttgaggtcatgcgcgacatggtttacaacctgccaatactcacgccgccgcgctgggtgcgttcgcgcaccacgcccatcgccctggaaaatttactctactacctggtcggcttactggagcaccctgcgcacgagcatcgtattctggaagccgccgggccgcaggtattaagttatcagcagcagtttgaacgttttatggccgtcagcggtaaacggcgtccgctgatcccggtgccttttccgacccgctggatttcggtctggtttttaaacgtcattacctccgtgccgccaactaccgcaaaagcgttaatccagggattaaggcacgatttgctggccgatgacgccgcgttaaaaaagttgatcccccaaacgcttatcacctttgatgacgccgttcgccgcacgctgaaagaagaagaaaaactggtgaactccagcgactggggctacgacgcgctggccttcgcccgctggcgtcccgaatacggctattttccaaagcaggcgggctttaccgcgcagaccccggccagcctatcggcgctatggcaggtcgtaaatcggctgggtggcaaagagggctattttttcggcaatattttgtggcagacgcgcgccgcgatggaccgtctggtggggcataaactggcgaaaggccgcccgtcgcataccttgctcaagcctggcgatacggtagatagctggaaagtgatcattgtcgaaccagaaaaacagctcacgctcttgtttggcatgaaagcgccgggtctggggcggcttagcttcacgctgcacgataaaggccgctaccgcgaaattgacgtgcgcgcctggtggcatccacacggaatgccgggcctgatttactggctactgatgatcccggcgcacctgtttattttccggggaatggcaaggcgtattgcccgacttgcagaacaaatcacagaaaaatga +>6 +atgaataaatttgctcgccattttctgccgctgtcgctgcgggttcgttttttgctggcgacagccggcgtcgtgctggtgctttctctggcatatggcatagtggcgctggtcggctatagcgtaagttttgataaaaccacgtttcgtctgctgcgcggcgaaagcaacctgttttataccctcgccaaatgggaaaataataaaatcagcgttgagctgcctgaaaatctggacatgcaaagcccgaccatgacgctgatttacgatgaaacgggcaaattattatggacgcagcgcaacatcccctggctgattaaaagcattcaaccggaatggttaaaaacgaacggcttccatgaaattgaaaccaacgtagacgccaccagcacgctgttgagcgaggaccattccgcgcaggaaaaactcaaagaagtacgtgaagatgacgatgatgcggagatgacccactcggtagcggtaaatatttatcctgccacgacgcggatgccgcagttaaccatcgtggtggtcgataccattccgatagagctaaaacgctcctatatggtgtggagctggttcgtatacgtgctggccgccaatttactgttagtcattcctttactgtggatcgccgcctggtggagcttacgccctatcgaggcgctggcgcgggaagtccgcgaacttgaagatcatcaccgcgaaatgctcaatccggagacgacgcgtgagctgaccagccttgtgcgcaaccttaatcaactgctcaaaagcgagcgcgaacgttataacaaataccgcacgaccctaaccgacctgacgcacagtttaaaaacgccgctcgcggttttgcagagtacgttacgctctttacgcaacgaaaagatgagcgtcagcaaagctgaaccggtgatgctggaacagatcagtcggatttcccagcagatcggctattatctgcatcgcgccagtatgcgcggtagcggcgtgttgttaagccgcgaactgcatcccgtcgcgccgttgttagataacctgatctccgcgctcaataaagtttatcagcgtaaaggggtgaatatcagtatggatatttcaccagagatcagttttgtcggcgagcaaaacgactttgtcgaagtaatgggcaacgtactggacaacgcttgtaaatattgtctggagtttgtcgagatttcggctcgccagaccgacgatcatttgcatattttcgtcgaagatgacggcccaggcataccccacagcaaacgttccctggtgtttgatcgcggtcagcgcgccgataccctacgaccaggacaaggcgtggggctggctgtcgcgcgcgagattacggaacaatacgccgggcagatcattgccagcgacagtctgctcggtggcgcccgtatggaggtcgtttttggccgacagcatcccacacagaaagaggaataa +>7 +atgacaatccagaagcggttgctggaggccgttgaacaaaaactcttacggcccattgatgcccagtttgctttaaccgtcgccggtaacgacgatcccgccgtgacgctggcggcagcgctgcttagtcatgacgcaggtgaaggtcacgtgtgtctgccgttgtcgcgtttaacgttaacggaggaggcgcatcccttactggtcgcctggataagcgaaacggctacgccaatcgactggaaaaagcggttactggcgtctgcggcggttagctgcggcgatagccccgcgccgttaattctgtgcggcgatcgcctctacctgaatcgcatgtggtgcaacgagcgtacggttgcgcgcttttttaacgaggttaaccaggccatcgccgtagatgaagatcagttatcccgtattctggacgcgctattccctccgacagacgaggtgaactggcaaaaggtggccgccgccgtagcgctaactcgccgtatctccgtgatttcaggcggtcccggcaccggtaaaaccaccaccgtcgcgaagctgctggcggcattaattcaaatggcggatggcgaacgttgccgtatccggctggcggcgccaaccgggaaagcggccgcacgcctgacggagtcgctcggcgcggcgttgcgtcagcttcctcttaccgatgcgcagaaaaagcgtataccggaggacgccagtacgctgcaccgactgctgggcgcacagcccggcagccagcgattacgccatcatgcgggcaacccgctgcatctggacgtgctggtggtcgatgaggcatcaatgattgatttgccgatgatgtcacgtttgattgacgctctgccgccgcacgggcgggtcatttttcttggcgatcgcgatcagttggcatccgttgaggcgggcgctgtgttgggcgatatttgcgcctatgtcaacgcagggtttacggcggaacgcgcccgacagctaagtcgactcactggtagcgccattccggcgggggccggaacgcaagccgcgtctttacgcgatagcctctgtttgttgcaaaagagctaccgtttcggcagcgattctggcatcggcaagctggcggcggcaattaactgtggcgacaggtcggcaatccaggctgtttttcagcaggggtttagcgatatagagaagcgtacattgcaaagcagcgacgattatgccgggatgctggatgaagcgctggcgggctatgggcgctacctgcggttgctgcatgaaaaagcggcgccggaggcaatccttcaggctttcaatgagtatcaactgctttgtgcgctgcgcgaaggcccatttggcgtgaggggactgaatgaccgcattgagcaggcgatggtgcagcaacgaaaaattcagcggcatccgcactctcgctggtatgaagggcgtccggtaatgattgcgcgcaacgatagcgcgttggggctatttaacggcgatattggtattgcgctcgatcgcgggcaggggttacgcgtctggtttgtgatgccggacggcacgatcaagtccgttcagcccagccgtctgccggaacatgatacaacctgggcgatgacggtacataaatcacaggggtctgaatttgatcacgccgcgctgattttacccagccaacgttcgccggtcgtgacgcgggagctggtgtataccgctgttacgcgggcgcggcggcggttatcgctgtatgccgatgaacggatcctggcaggcgcgattgtgacgcgaacggaacggcgcagcgggttagccaccttatttgatgaagtcagtcgtatcggataa +>8 +atgcaggaggttgcaatgagctcccaggaagccagcaagatgctgcgcacttacaatattgcctggtggggcaataactactacgacgtcaacgagctgggccatattagcgtatgccccgatcccgacgtaccggaagcgcgtgtcgatcttgccaaactggtgaaagcgcgcgaagcgcaaggtcagcgtctgccggcgctgttctgcttcccgcagatcctgcaacaccgtttgcgttcaattaacgcggcgtttaagcgtgcgcgtgaatcttacggttataacggcgactacttcctcgtttatccgattaaggtcaatcagcatcgccgcgtgatcgagtcccttatccattccggtgaaccgttggggctggaagcggggtcgaaagcggaattgatggcggtgctggcgcatgccggcatgacccgtagcgtgatcgtctgtaatggttataaagaccgtgaatatattcggctggcgctgattggcgagaagatgggccataaggtttatctggtcatcgaaaagatgtctgaaatcgcgattgtgctggaagaggccgagcgcctgaacgtggttcctcgcctgggcgtgcgtgcgcgtctggcctctcaggggtccggtaagtggcaatcctccggcggcgaaaaatcaaaattcggcctggcggcgacgcaggtgctgcaactggtggagaccctgcgtgacgctgggcgtctggacagtctgcaactgttgcacttccacctgggatcgcagatggcgaacattcgcgatatcgcgaccggcgtgcgcgagtccgcgcgtttctatgttgagctgcataagctgggcgttaatatccagtgcttcgacgtgggcggcggtctgggcgtggattatgaaggtacccgctcgcagtccgactgttcggtgaactatggcctgaacgaatatgccaataacatcatctgggcgattggcgatgcctgcgaagagcatggtttaccgcatccgacggtgattaccgagtctgggcgcgccgtcactgcgcaccatacggtactggtctctaacattatcggcgtggagcgtaacgaatacacggatccgaccgcccctgctgaagatgcgccgcgcgcgctgcaaaatctgtgggaaacctggcaggagatgcataagcctggcacccgccgctcgctgcgtgaatggctgcatgatagccaaatggatctgcacgatattcatatcggctactcttccggcgcgtttagtttgcaggagcgcgcctgggcggagcaactttatctcagcatgtgccatgaagtgcagaagcaactggacccgcaaaaccgtgcgcatcgcccgattatcgacgaactgcaagagcggatggcggacaaaatgtacgtcaacttctcgctgttccagtcgatgccggacgcgtggggaatcgatcagctctttccggtgctgccgctggaagggttagatcaggtaccggaacgtcgtgccgtgctgctggatattacctgtgattccgatggcgctatcgaccactatatcgatggcgacggtatcgccacgacgatgccgatgccggaatacgatccagagaatccgccgatgctcggcttctttatggtcggcgcctatcaggagatcctcggtaacatgcacaacctgtttggcgatactgaagcggttgacgtgtttgtcttcccggatggtagtgtcgaggttgagttgtcggacgaaggcgataccgttgcggatatgctgcaatatgtgcaactggatccgaaaacgctattaacgcatttccgcgaccaggtaaaacagacggatctggacgatgcgttgcagcagcagttccttgaggaatttgaagcgggactgtacggttatacttatcttgaagatgagtag +>9 +atgaattctctaccacaacggtcaaccgattttgaactgacaacatcacaggatggttttgcgcttagctggcaacagcgcctgattttacgccacagcgccgaaaacccctgtctgtggattggcgcgggcgttgccgacattgacatgtttcgcggcaacttcagcatcaaagacaaacttaacgagaagattgcattaacggaggccaccgtcagcgagctacccgacggctggctggtacaattcagccgtggcgcaacaattagcgccacccttcgcatctccgccgatgaggcgggacgcctgacgttggatctgcaaaacgacgacctgcaccataaccgtatctggttacgcctcgcagctaatccagacgaccatatctacggctgcggcgaacagttctcttatttcgatttgcgcggcaagccgttcccgctgtggaccagcgaacagggcgttggccgtaataaaaccagctatgtcacctggcaggcagactgtaaagagaacgccggcggcgactattactggaccttcttcccgcaaccgacctttgtcagcacgcagaagtattactgccacgtcgataatagctgctatatgaatttcgacttcagcgcgccggagtatcacgaactggcgctgtgggaagataaaactacgctgcgttttgagtgtgccgacacctacatcgccctgctggaaaaactgactgcgctgttaggtcgccagccggagctgccggactgggtttacgacggcgtcacgctcggcattcagggcggtacggaagtttgtcagcaaaaactggataccatgcgcaacgcaggcgtaaaagtgaacggtatttgggcgcaggactggtccggtatccgcatgacctcctttggcaagcgcgtgatgtggaactggaagtggaatagcgacaactatccacagctggatagccggatcaaacagtggaaagaagaaggcgtccagttcctctcttatatcaacccatacgtcgccagtgataaagacctctgcgctgaggcggcgagacacggctatctggcgaaagacgccacgggcggcgactatctggtcgagtttggcgaattctatggcggcgtggtcgatctgactaatcctgaagcttacgactggttcaaagatgtcatcaaaaagaacatgatcgcgctcggctgcagcggctggatggcggatttcggcgaatatctgccgaccgacacgtatctgcacaacggcgtcagcgcagagatcatgcataacgcctggcccgcactgtgggcgaagtgtaactacgaagcgctacaggagaccggcaagctcggcgagatcctgttctttatgcgtgcgggttacaccggcagtcagaaatattccaccatgatgtgggcaggcgaccagaacgttgactggagtcttgatgatggtctggcctctgtcgtgcctgctgcattgtcgctggcgatgaccggccatggtctgcatcacagcgatatcggcggctacaccaccctgtttgacatgaagcgcagcaaagagttgctgctgcgctggtgcgatttcagcgcctttacgccgatgatgcgcacccatgaaggcaaccgccccggcaataactggcagttcgacggcgacgcggaaactattgcccactttgcccgcatgaccaccgtctttaccacgctgaaaccgtatctcaagcaggcggtggcgcaaaacgcggctaccggtctgccggtcatgcgtccgctattcctgcactacgagaacgatgccgctacctacaccctgaaatatcaatatctgctcggtcaggatctgctggtcgcgccggttcacgagcaggggcgttgcgattggacgctgtacctgccggaagatcactgggtgaatatctggaccggcgaagctcaccacggcggtgaaattaccgtggatgcgccgattggcaagccgccggtcttctatcgcgcgaagagcgagtgggctttactttttgcttctttacggaatatctaa +>10 +atgaaactttttctgacaacggcagcgctaactgcgaccttgatttccggcatggcatttgccagcgatcccgtcatcccctgggcgactaacagcggcggtacggaaagcacgcatattgcggcaatgggcgaggatttgaatgcgcagcatcagcagatcacccacacgcatgaaggcgtatgggcggccaattccggcagcattcaggctgatgaagccgcgctgaccagcaacaagccgccggtacaaggccacccggaattgatgccgcatcagggctaa +>11 +atgaaacatttacgcgtggtggcctgcatgatcatgctggcgctggcgggatgcgataacaacgataaaaccgccccgacgactaaaagcgaagcgccagccgtagcgcagccctcgcccgcgcaagacccgtcacagctacaaaagctggcgcagcaaagccagggcaaagcgctcacgctattagacgcctccgaagcgcagctcgacggcgcggcgacgctggtgctgacgttttcaattcctttagatcctgaacaggatttctcccgtgtggttcacgtggttgataagaaaagcggcagcgttgacggcgcatgggagctggcgccaaatttaaaagagctaaggttacgtcatctggaacctgagcgcgtgctggtggtcacggttgatcccgccgttaaagcgctgaataacgccaccttcggcaagtcttatgaaaaaacgattaccacgcgtgatgtccagcccagcgtcggctttgccagccggggatcgctgctaccggggaaaatagcggaaggactgccggtcatggcgcttaacgtcaaccacgttgatgtgaactttttccgcgttaagcccggatcgctggcgtcgtttgtcagccagtgggagtaccgtagttccctctctaactgggaatccgacaatctgctgaaaatggcggatctggtttataccggtcgttttgatcttaatccggcgcgtaatacgcgtgagaaactgctgctgccattaagcgatattaagccgctgcaacaggcgggcgtatatgtggcggtaatgaatcaggctggacactacaactatagtaatgccgctacgttgtttacccttagcgatatcggcgtgtccgcgcaccgttaccatagtcggctggatatctttacgcaaagcctggaaaacggcgcggcgcagtcgggaattgagatcgttcttctgaatgataaagggcagacgctggcgcaggcgacgagcgacgcgcagggacatgtgcaactggaggctgataaagcggcggcgctattactggcgcgtaaagaggggcagaccacgctgctcgatctcacgcttccggctctggatctgtcggagtttaatgtcgctggcgcgcccggctacagcaagcagttctttatgttcggcccacgcgatctctaccgaccgggcgaaacggttatcctcaacggattactgcgcgatagcgacggtaaaacgctgcccgatcaacccgttaagctggaagtggtaaaaccagacggacaagtgatgcgtaccgtcgtcagccagccggaaaacggactataccgtttgaattatccgctggatatcaacgcgccgaccggcttgtggcatgtccgcgccaacaccggcgataatttgctgcggagttgggatttccacgtggaagactttatgccggagcggatggcgctcaacctgacggcgcaaaaaacaccgctggcgcctgcggatgaggtgaaattctccgttgtcggctattacttgtatggcgcccctgctaacggcaataccctgcaagggcaacttttcctgcgtccgctgcgcgacgctgtcgcggcgttgcctggcttccagttcggcaatattgccgaagagaatctttcgcgcagcctggacgaagttcaactgacgttggataaaggcggacgtggtgaagtgagtgctgccagccagtggcaagaagcgcattcgccgttgcaggtaattctacaggccagcctgctggagtctggcggtcgtccggtcactcgtcgcgtagagcaggcgatttggcctgccgatacgttaccgggaattcgtccacagttcgccgccaaagcggtatacgactaccgtacggataccaccgttaatcaaccgattgtcgacgaagacagcaacgccgcattcgatattgtttacgccaacgcgcagggcgagaaaaaagcggtgtccggtttacaggtgcggctcatccgcgagcgtcgcgactattactggaactggtcggaaagcgaaggctggcagtcgcagtttgatcaaaaagatctggtggagggcgagcagacgctggatctgaacgcggatgaaaccggaaaagtcagcttcccggtggaatggggcgcgtaccgtctggaggtcaaagcgccgaatgagacggtcagcagcgttcgtttctgggccggctatagctggcaggataacagcgacggtagcggcgcggcgcgtccggatcgcgtcaccctcaaactggataaagcgaattatcgtccaggcgacaccatgaaattgcatatcgccgcgccggtcgccggtaaaggttatgccatggtggagtccagcgatggtccgctgtggtggcaggcgatcgacgtgccggcgcaggggctggagctcacgattccggtggataaaacctggaatcgccacgatctctatctcagtacgctggtggtgcgtcccggcgataaatctcgttccgcgacgccaaaacgcgccgtggggttactacatctaccgctgggggatgacaaccgccgcctcgatctggcgctggaaagcccggctaaaatgcgcccgaatcagccgctcaccgtcagggtgaaagccagcgttaaacacggcgaaatgccaaaacagatcaacgtgctggtctccgcggtcgatagcggtgtattgaatatcaccgattacgcgacgccggacccgtggcaggcgttcttcggtcaaaaacgctacggtgcggatatctacgatatttacggccaggtcattgaagggcaggggcggctggcggcgttgcgttttggcggcgatggcgacgaccttacgcgcggcggaaaaccgccggtaaaccatgccaatatcatcgcgcagcaggcgcagccgatcacgctcaatgagcagggcgaaggggtcgtaacgctgccgattggcgactttaacggcgaactgcgggttatggcgcaggcatggacagcggacgattttggtcgcggcgaaagcaaagtcgttgtcgccgcgccagtgattgccgagctgaatatgccgcgttttctggcgggaggggatgtttcgcgactggtgctggacgtcaccaatctgaccgaccgcccgcagacgctgaatattgcgctcgccgccagtgggttactggaactgcttagtcaacagccgcaaccggtcaacctggcgccgggcgtgcgcaccaccttattcgttccggtacgcgcgctggaaggttttggcgaaggcgaaatccaggcgaccattagcggtctgaatctgccgggagaaaccctcggcgcgcagcataagcagtggcaaatcggcgtgcgtccggcctggcctgcccaaacggtaaatagcggcattgcgctggcgccgggagagagctggcatgtaccagagcagcatctggcaaacgtctcgccagccacgttacagggacaactgctgttaagcggaaaaccgccgctcaatctggcgcgctacattcgcgagctgaaagcatatccgtacgggtgcctggaacaaaccaccagcgggttattcccggcgctgtataccaatgccgctcaattgcagtcgctcggtattaccggcgatagcgatgaaaaacggcgcgccgcggtggatatcggcatctcccgcatactacagatgcagcgtgataacggcggttttgcgctatgggatgaaaatggggcggaagagccctggctaacggcctacgcgatggatttcctcattcgcgcgggcgagcagggatatagcgtcccgccggaggccattaaccggggcaatgagcgactgctgcgctatctgcaggatcccggtacgatgctgattcgttatagcgataatacccaggccagtacttttgccgctcaggcttacgccgcgctggtactggcgcgtcagcagaaagcgccgctcggcgcgctgcgcgaaatctgggagcgccgtagtcaggcggcttcaggactgccgctgatgcaattgggcatcgcgttaaacacgatgggtgatgccagacgcggcgaagaggccattacgctggctctgaatacgccgcgtcaggacgaacggcaatggatagcggattacggcagttctctgcgcgataacgctctgatgttgtcgttactggaagagaacaacctcagaccggacgcgcaaaacgcgctattaagctcgctttctgagcaggccttcggtcagcgctggctctctacccaggagaacaatgccttgttcctcgccgcgcattcgcgacaggccagcgcgggcgcctggcaggcgcagacctcgttagaggcgcagccgctgtcgggcgacaaggcgctgacccgtaatctggatgctgatcagctggccgcccttgaggtgacgaacaccggtagccagccgctatggctgcgtctggatagcagcggctatccctcatctgcgcctgagcctgccagcaacgttttgcagattgaacgacaaatactggggaccgatggtcagcgcaaatcgctgtcctcgttgcgtagcggcgaactggtgctggtctggttaacggtagtggccgatcgcaatgtgccggatgcgctggtggtggacctgctcccggccgggctggagctggaaaaccagaatctggctgacagcagcgccagcctgccggagagcggtagcgaagtgcaaaatctgcttaatcagatgcagcaggcggatattcagtatatggaattccgcgacgatcggtttgtggctgccgtcgttgtcaatgagggccagcccgtgacgctggtctacctggcgcgcgcggtaacgccggggacgtaccagcttgcgcaaccgcaggtggaatcgatgtacgcgcctcagtggcgggcaaccggcgcgagcgagggactgctgattgtgacgccttaa +>12 +atggcaaagaaccgtagtcgtcgtctgcgtaaaaaaatgcacattgacgaattccaggaattaggattttcggtggcatggcgttttccggaaggtacatctgaagagcagatcgataagactgtcgatgactttattaatgacgttattgagcctaataaactggcgtttgacggcagcggctacctggcctgggaagggctgatttgtatgcaggaaatcggtaaatgtaccgaagaacatcaggcgattgtgcgtaagtggctggaagcgcgcaaccttgaagaagtccgaaccagcgaacttttcgacgtttggtgggactaa +>13 +ctgaaactaatacctttttatttgctcgcgctattttctgccgcctccggggctacggagataaacgcctgcaaagacctgatcggaacatggaaaaccactgcagataacccgccttataccatgacaatattgccgccagtagaaggctgcggggaaaaatgtgtgaaactgaacgtacagtatgaacttgacgtgactcaccgcaacgcgctttattgccatgaaggacaagagggggtaaaagggcagggacccatggtgatagcatttgaaggggcgtatggtggacacgctattggaacctataaccgacagctgcaattactttgggctggcgtaataccgaaaaataaaaaggggaaatggataacgaaaatggaaaattactggttcaggcaggtaaaagcgcactaa +>14 +atgaaaaaattttatagctgtcttcctgtctttttactgatcggctgtgcccaggtgcccctcccttcctccgtgagcaaaccggtacagcaacctggcgctcagaaagagcaactggccaacgcaaatagtattgatgagtgtcagtctcttccgtatgtgccgtcagaccttgcgaagaataaatcattatcaaaccagaacgctgataattccgcatcaaaaaatagcgcaatcagctcaagcattttttgcgaaaaatataaacaaaccaaagaacaggcgcttaccttcttccaggaacatccacaatacatgcgttcgaaagaagatgaagagcaactcatgaccgaatttaaaaaagttctacttgaacccggaagtaagaatttaagcatatatcagacgttacttgctgcccatgaaagactgcaagccttataa +>15 +atgcgaattaaacctgacgataactggcgctggtattatgatgaagagcacgatcgtatgatgctcgatctcgccaatggtatgctttttcgctcgcgcttctctcgtaaaatgcttacgccggacgcgttttgcccgaccggtttttgtgtggatgatgccgcgctctatttttcctttgaagaaaaatgccgggatttcgaattaaccaaagagcaacgcgcggagctggtattgaatgcgctggtggcgattcgctatctcaaaccgcagatgccgaaaagctggcattttgtggcgcatggcgaaatgtggaccccgggcacgggtgatgccgcaagcgtctggttaagcgatacggcggaacaggttaatttgctggtcgttgagccgggagaaaatgccgcgttgtgtctgctggcacagccaggcgtggtgatagcaggccgaaccatgcagcttggcgatgcgattaaaattatgaacgacaggctgaaaccgcaggttcactgtcacagtttcagccttgaacaagcggtttaa +>16 +gtgctggcatttaccctacgctttattaaaaacaaacgctatttcgcgattctggcgggggcgttggttattatcgctgggttggcctcacaacatgcctggagcggtaacggtctaccgcagattaacggtaaggcgctggcggcgctggcgaagcagcatccggtagtcgtgttattccgccatgccgagcggtgcgatcgttccgataatacctgcctgtcagacagcacggggattacggtcaacggtgcgcaagatgccagagcgctgggtaaagccttcagtgccgatatacaaaattacaatctctattccagtaatacggtgcgcaccatccagtcggcaacctggttttccgcaggcaggtcgcttacggcggataaaaaaatgatggattgcggcagcggtatttatgcttcaatcaatacgttactgaaaaagtcgcagaataaaaatatagtcatttttacccataatcattgtttgacgtatattgcaaaaaataagcgcggcgtgaagtttgatccggactatcttaatgcgctcgttatgtatgcggaaaacggaaaactctttttagacggtgaatttgttcccggctag +>17 +atgggcagtaattatatcgtcatcgagggcctggaaggcgccggaaaaaccactgcgcgcgacgtggtggtggagacccttgagcaactgggtattcgtaacatgatttttacccgtgagccgggcggtacgcagcttgccgaaaaactaagaagtctggtgctggatatccgatcggtaggcgacgaagtgattaccgataaagcggaagtgctgatgttttatgccgctcgcgtacagctcgtcgaaacggtaatcaaacccgcgctggcgcaaggcgtatgggtgatcggcgatcgccacgatctctcgacccaggcgtatcaggggggagggcgcggcattgatcaaaccatgctggcgacactacgcgatgccgtgctgggcgattttcgtcctgacctgacgctgtatctggatgtcacgccggaagtcgggctaaaacgcgccagagcgcgcggcgatttggatcgcattgagcaggaatcttttgatttctttaaccggactcgcgcgcgctatctggaactggcggcgcaagactcgcgtatccgcaccattgacgcgacccagccgctggatgccgtcatgcgcgacattcgcgccacggtgacgaagtgggtgcaggagcaagcggcatga +>18 +atgaaacacataaagaaatccgtgttggtcgtattactgacaagccatgttgcccatgccagcattgttgttggcggaacccgactggttttcgacggcaataatgatgagtcgtcaatcaatgtagagaataaagacagcaaagcgaatcttgttcagtcatggttatcggttgccgatccccaggtcacaaataagcaggtgtttattatcaccccgcctctttttcgccttgatgccgggcaaaagaacagtattcgggttattcgctctggcgcccctctacctgcagatcgggagtctatgtactggctcaatattaagggtattccttctattgatgataatgcctccgcgaaccgcgtggaaatctccattaatacccaaataaagcttatttatcgaccgccagcattaaccaaatcaacgcctgatagccagagccaacaattaaaatggcagacggcaggggatgttattaccgtaaataaccctaccccctattatatgaactttgccagtgtgacgcttaacagccatgaagtgaaatcagcgaccttcgttccgccaaaatcatcagcgtcatttaaattgagttcgactgccgcccctcatggtacggtaacgtggcgattaatcagcgattatgggatgagtttagagccgcattccggttcattctag diff --git a/tests/test_data/outputs/search/G9/protein/hsps.txt b/tests/test_data/outputs/search/G9/protein/hsps.txt new file mode 100755 index 0000000..4a5a697 --- /dev/null +++ b/tests/test_data/outputs/search/G9/protein/hsps.txt @@ -0,0 +1,19 @@ +0 0 34 34 1 34 1 34 34 0 100.000 100 100 N/A 4.94e-20 64.3 +1 10 286 286 1 286 1 286 286 17 94.056 100 100 N/A 0.0 518 +2 11 324 324 1 324 1 324 324 0 100.000 100 100 N/A 0.0 640 +3 12 366 366 1 366 1 366 366 0 100.000 100 100 N/A 0.0 754 +4 13 427 427 1 427 1 427 427 9 97.892 100 100 N/A 0.0 846 +5 14 478 478 1 478 1 478 478 0 100.000 100 100 N/A 0.0 972 +6 15 488 488 1 488 1 488 488 14 97.131 100 100 N/A 0.0 972 +7 16 612 612 1 612 1 612 612 0 100.000 100 100 N/A 0.0 1241 +8 17 638 638 1 638 1 638 638 0 100.000 100 100 N/A 0.0 1326 +9 18 679 679 1 679 1 679 679 8 98.822 100 100 N/A 0.0 1403 +10 1 95 95 1 95 1 95 95 11 88.421 100 100 N/A 2.89e-51 147 +11 19 1645 1645 1 1645 1 1645 1645 0 100.000 100 100 N/A 0.0 3332 +12 2 109 109 1 109 1 109 109 0 100.000 100 100 N/A 3.09e-81 224 +13 3 139 139 1 139 1 139 139 9 93.525 100 100 N/A 3.70e-98 269 +14 4 148 148 1 148 1 148 148 11 92.568 100 100 N/A 1.56e-97 268 +15 5 181 181 1 181 1 181 181 0 100.000 100 100 N/A 1.79e-140 380 +16 6 202 202 1 202 1 202 202 14 93.069 100 100 N/A 2.36e-141 384 +17 7 214 214 1 214 1 214 214 0 100.000 100 100 N/A 1.75e-157 426 +18 8 228 228 1 228 1 228 228 0 100.000 100 100 N/A 6.32e-172 463 diff --git a/tests/test_data/outputs/search/G9/protein/queries.fasta b/tests/test_data/outputs/search/G9/protein/queries.fasta new file mode 100755 index 0000000..26b5df4 --- /dev/null +++ b/tests/test_data/outputs/search/G9/protein/queries.fasta @@ -0,0 +1,38 @@ +>0 +MYDPPFLEALMITASFFAIFIIIVVSVLLLEGD* +>1 +VNLVKTTGLHAAGKGINVAKVLKDLGIDVTVGGFLGKDNQDGFQQLFSELGIANRFQVVQGRTRINVKLTEKDGEVTDFNFSGFDVTPADWERFVNDSLSWLGQFDMVCVSGSLPAGVSPEAFTDWMTRLRSQCPCIIFDSSREALVAGLKAAPWLVKPNRRELEIWAGRKLPEMKDVIDAAHALREQGIAHVVISLGAEGALWVNASGEWIAKPPAVDVVSTVGAGDSMVGGLIYGLLMRESSEHTLRLATAVAALAVSQSNVGITDRPQLAAMMARVDLQPFN* +>2 +MNKNKYSTPLLMLATILAGMLSPMQSAVNGQLGHWLQDGNACAVISFASGLVVMFFIIIARKETRQQFASIPTLIKKRKIPLWNWFAGLCGAMVVFSEGASASALGVATFQTALISALLLSGLLCDRFGIGVEEKKYFTPWRITGALFAVIATIFVVSPQWHSTSFILLAILPFLAGLLAGWQPAGNAKVAEATGSMLVSITWNFIVGFCVLGAALAIRIALGHVTIQLPDTWWMYLGGPLGLLSIGLMAILVRGLGLLMLGVASTAGQLLGSVLIDELIPSLGNTVYLVTIIGTLFALVGAIVTTIPEYRASKMAQKMEVSE* +>3 +MKGRWAKYVATGVMLAMLAACSSKPTDRGQQYKDGKFTQPFSLVNQPDAVGAPINAGDFAEQVDQIRSASPRLYTNQSNVYNAVQNWLRSGGDTRTMRQFGIDAWQMEGTDNYGNVQFTGYYTPVVQARHTRQGAFQYPIYSMPPKRGRLPSRAQIYAGALSDKYILAWSNSLMDNFIMDVQGSGYIDFGDGSPLNFFSYAGKNGWPYRSIGKVLIDRGEVKKEDMSMQAIREWGEKHSEAEVRELLEQNPSFVFFKPQSFAPVKGASAVPLIGRASVASDRSIIPPGTTLLAEVPLLDNNGKFSGQYELRLMVALDVGGAIKGQHFDIYQGIGPDAGHRAGWYNHYGRVWVLKSAPGAGNVFSG* +>4 +LFDRYDAGEQAVLVHIYFSQDKDMEDLQEFESLVSSAGVEAMQVITGSRKAPHPKYFVGEGKAVEIAEAVKATGAAVVLFDHALSPAQERNLERLCECRVIDRTGLILDIFAQRARTHEGKLQVELAQLRHLATRLVRGWTHLERQKGGIGLRGPGETQLETDRRLLRNRIVQIQSRLEKVEKQREQGRQSRIKADVPTVSLVGYTNAGKSTLFNQITEARVYAADQLFATLDPTLRRIDVADVGETVLADTVGFIRHLPYDLVAAFKATLQETRQATLLLHVVDAADVRVQENIEAVNTVLEEIDAHEIPTLMVMNKIDMLDDFEPRIDRDEENKPIRVWLSAQSGVGIPQLFQALTERLSGEVAQHTLRLLPQEGRLRSRFYQLQAIEKEWMEEDGSVSLQVRMPIVDWRRLCKQEPALIEYVI* +>5 +VAQRILVLGASGYIGQHLVFALSQQGHQVRAAARRVERLEKHRLANVSCHKVDLHWPENLPALLRDIDTVYYLVHGMGEGGDFIAHERQAALNVRDALRQTPVKQLIFLSSLQAPAHEQSDHLRARQLTADTLRDAGVPVTELRAGIIVGAGSAAFEVMRDMVYNLPILTPPRWVRSRTTPIALENLLYYLVGLLEHPAHEHRILEAAGPQVLSYQQQFERFMAVSGKRRPLIPVPFPTRWISVWFLNVITSVPPTTAKALIQGLRHDLLADDAALKKLIPQTLITFDDAVRRTLKEEEKLVNSSDWGYDALAFARWRPEYGYFPKQAGFTAQTPASLSALWQVVNRLGGKEGYFFGNILWQTRAAMDRLVGHKLAKGRPSHTLLKPGDTVDSWKVIIVEPEKQLTLLFGMKAPGLGRLSFTLHDKGRYREIDVRAWWHPHGMPGLIYWLLMIPAHLFIFRGMARRIARLAEQITEK* +>6 +MNKFARHFLPLSLRVRFLLATAGVVLVLSLAYGIVALVGYSVSFDKTTFRLLRGESNLFYTLAKWENNKISVELPENLDMQSPTMTLIYDETGKLLWTQRNIPWLIKSIQPEWLKTNGFHEIETNVDATSTLLSEDHSAQEKLKEVREDDDDAEMTHSVAVNIYPATTRMPQLTIVVVDTIPIELKRSYMVWSWFVYVLAANLLLVIPLLWIAAWWSLRPIEALAREVRELEDHHREMLNPETTRELTSLVRNLNQLLKSERERYNKYRTTLTDLTHSLKTPLAVLQSTLRSLRNEKMSVSKAEPVMLEQISRISQQIGYYLHRASMRGSGVLLSRELHPVAPLLDNLISALNKVYQRKGVNISMDISPEISFVGEQNDFVEVMGNVLDNACKYCLEFVEISARQTDDHLHIFVEDDGPGIPHSKRSLVFDRGQRADTLRPGQGVGLAVAREITEQYAGQIIASDSLLGGARMEVVFGRQHPTQKEE* +>7 +MTIQKRLLEAVEQKLLRPIDAQFALTVAGNDDPAVTLAAALLSHDAGEGHVCLPLSRLTLTEEAHPLLVAWISETATPIDWKKRLLASAAVSCGDSPAPLILCGDRLYLNRMWCNERTVARFFNEVNQAIAVDEDQLSRILDALFPPTDEVNWQKVAAAVALTRRISVISGGPGTGKTTTVAKLLAALIQMADGERCRIRLAAPTGKAAARLTESLGAALRQLPLTDAQKKRIPEDASTLHRLLGAQPGSQRLRHHAGNPLHLDVLVVDEASMIDLPMMSRLIDALPPHGRVIFLGDRDQLASVEAGAVLGDICAYVNAGFTAERARQLSRLTGSAIPAGAGTQAASLRDSLCLLQKSYRFGSDSGIGKLAAAINCGDRSAIQAVFQQGFSDIEKRTLQSSDDYAGMLDEALAGYGRYLRLLHEKAAPEAILQAFNEYQLLCALREGPFGVRGLNDRIEQAMVQQRKIQRHPHSRWYEGRPVMIARNDSALGLFNGDIGIALDRGQGLRVWFVMPDGTIKSVQPSRLPEHDTTWAMTVHKSQGSEFDHAALILPSQRSPVVTRELVYTAVTRARRRLSLYADERILAGAIVTRTERRSGLATLFDEVSRIG* +>8 +MQEVAMSSQEASKMLRTYNIAWWGNNYYDVNELGHISVCPDPDVPEARVDLAKLVKAREAQGQRLPALFCFPQILQHRLRSINAAFKRARESYGYNGDYFLVYPIKVNQHRRVIESLIHSGEPLGLEAGSKAELMAVLAHAGMTRSVIVCNGYKDREYIRLALIGEKMGHKVYLVIEKMSEIAIVLEEAERLNVVPRLGVRARLASQGSGKWQSSGGEKSKFGLAATQVLQLVETLRDAGRLDSLQLLHFHLGSQMANIRDIATGVRESARFYVELHKLGVNIQCFDVGGGLGVDYEGTRSQSDCSVNYGLNEYANNIIWAIGDACEEHGLPHPTVITESGRAVTAHHTVLVSNIIGVERNEYTDPTAPAEDAPRALQNLWETWQEMHKPGTRRSLREWLHDSQMDLHDIHIGYSSGAFSLQERAWAEQLYLSMCHEVQKQLDPQNRAHRPIIDELQERMADKMYVNFSLFQSMPDAWGIDQLFPVLPLEGLDQVPERRAVLLDITCDSDGAIDHYIDGDGIATTMPMPEYDPENPPMLGFFMVGAYQEILGNMHNLFGDTEAVDVFVFPDGSVEVELSDEGDTVADMLQYVQLDPKTLLTHFRDQVKQTDLDDALQQQFLEEFEAGLYGYTYLEDE* +>9 +MNSLPQRSTDFELTTSQDGFALSWQQRLILRHSAENPCLWIGAGVADIDMFRGNFSIKDKLNEKIALTEATVSELPDGWLVQFSRGATISATLRISADEAGRLTLDLQNDDLHHNRIWLRLAANPDDHIYGCGEQFSYFDLRGKPFPLWTSEQGVGRNKTSYVTWQADCKENAGGDYYWTFFPQPTFVSTQKYYCHVDNSCYMNFDFSAPEYHELALWEDKTTLRFECADTYIALLEKLTALLGRQPELPDWVYDGVTLGIQGGTEVCQQKLDTMRNAGVKVNGIWAQDWSGIRMTSFGKRVMWNWKWNSDNYPQLDSRIKQWKEEGVQFLSYINPYVASDKDLCAEAARHGYLAKDATGGDYLVEFGEFYGGVVDLTNPEAYDWFKDVIKKNMIALGCSGWMADFGEYLPTDTYLHNGVSAEIMHNAWPALWAKCNYEALQETGKLGEILFFMRAGYTGSQKYSTMMWAGDQNVDWSLDDGLASVVPAALSLAMTGHGLHHSDIGGYTTLFDMKRSKELLLRWCDFSAFTPMMRTHEGNRPGNNWQFDGDAETIAHFARMTTVFTTLKPYLKQAVAQNAATGLPVMRPLFLHYENDAATYTLKYQYLLGQDLLVAPVHEQGRCDWTLYLPEDHWVNIWTGEAHHGGEITVDAPIGKPPVFYRAKSEWALLFASLRNI* +>10 +MKLFLTTAALTATLISGMAFASDPVIPWATNSGGTESTHIAAMGEDLNAQHQQITHTHEGVWAANSGSIQADEAALTSNKPPVQGHPELMPHQG* +>11 +MKHLRVVACMIMLALAGCDNNDKTAPTTKSEAPAVAQPSPAQDPSQLQKLAQQSQGKALTLLDASEAQLDGAATLVLTFSIPLDPEQDFSRVVHVVDKKSGSVDGAWELAPNLKELRLRHLEPERVLVVTVDPAVKALNNATFGKSYEKTITTRDVQPSVGFASRGSLLPGKIAEGLPVMALNVNHVDVNFFRVKPGSLASFVSQWEYRSSLSNWESDNLLKMADLVYTGRFDLNPARNTREKLLLPLSDIKPLQQAGVYVAVMNQAGHYNYSNAATLFTLSDIGVSAHRYHSRLDIFTQSLENGAAQSGIEIVLLNDKGQTLAQATSDAQGHVQLEADKAAALLLARKEGQTTLLDLTLPALDLSEFNVAGAPGYSKQFFMFGPRDLYRPGETVILNGLLRDSDGKTLPDQPVKLEVVKPDGQVMRTVVSQPENGLYRLNYPLDINAPTGLWHVRANTGDNLLRSWDFHVEDFMPERMALNLTAQKTPLAPADEVKFSVVGYYLYGAPANGNTLQGQLFLRPLRDAVAALPGFQFGNIAEENLSRSLDEVQLTLDKGGRGEVSAASQWQEAHSPLQVILQASLLESGGRPVTRRVEQAIWPADTLPGIRPQFAAKAVYDYRTDTTVNQPIVDEDSNAAFDIVYANAQGEKKAVSGLQVRLIRERRDYYWNWSESEGWQSQFDQKDLVEGEQTLDLNADETGKVSFPVEWGAYRLEVKAPNETVSSVRFWAGYSWQDNSDGSGAARPDRVTLKLDKANYRPGDTMKLHIAAPVAGKGYAMVESSDGPLWWQAIDVPAQGLELTIPVDKTWNRHDLYLSTLVVRPGDKSRSATPKRAVGLLHLPLGDDNRRLDLALESPAKMRPNQPLTVRVKASVKHGEMPKQINVLVSAVDSGVLNITDYATPDPWQAFFGQKRYGADIYDIYGQVIEGQGRLAALRFGGDGDDLTRGGKPPVNHANIIAQQAQPITLNEQGEGVVTLPIGDFNGELRVMAQAWTADDFGRGESKVVVAAPVIAELNMPRFLAGGDVSRLVLDVTNLTDRPQTLNIALAASGLLELLSQQPQPVNLAPGVRTTLFVPVRALEGFGEGEIQATISGLNLPGETLGAQHKQWQIGVRPAWPAQTVNSGIALAPGESWHVPEQHLANVSPATLQGQLLLSGKPPLNLARYIRELKAYPYGCLEQTTSGLFPALYTNAAQLQSLGITGDSDEKRRAAVDIGISRILQMQRDNGGFALWDENGAEEPWLTAYAMDFLIRAGEQGYSVPPEAINRGNERLLRYLQDPGTMLIRYSDNTQASTFAAQAYAALVLARQQKAPLGALREIWERRSQAASGLPLMQLGIALNTMGDARRGEEAITLALNTPRQDERQWIADYGSSLRDNALMLSLLEENNLRPDAQNALLSSLSEQAFGQRWLSTQENNALFLAAHSRQASAGAWQAQTSLEAQPLSGDKALTRNLDADQLAALEVTNTGSQPLWLRLDSSGYPSSAPEPASNVLQIERQILGTDGQRKSLSSLRSGELVLVWLTVVADRNVPDALVVDLLPAGLELENQNLADSSASLPESGSEVQNLLNQMQQADIQYMEFRDDRFVAAVVVNEGQPVTLVYLARAVTPGTYQLAQPQVESMYAPQWRATGASEGLLIVTP* +>12 +MAKNRSRRLRKKMHIDEFQELGFSVAWRFPEGTSEEQIDKTVDDFINDVIEPNKLAFDGSGYLAWEGLICMQEIGKCTEEHQAIVRKWLEARNLEEVRTSELFDVWWD* +>13 +LKLIPFYLLALFSAASGATEINACKDLIGTWKTTADNPPYTMTILPPVEGCGEKCVKLNVQYELDVTHRNALYCHEGQEGVKGQGPMVIAFEGAYGGHAIGTYNRQLQLLWAGVIPKNKKGKWITKMENYWFRQVKAH* +>14 +MKKFYSCLPVFLLIGCAQVPLPSSVSKPVQQPGAQKEQLANANSIDECQSLPYVPSDLAKNKSLSNQNADNSASKNSAISSSIFCEKYKQTKEQALTFFQEHPQYMRSKEDEEQLMTEFKKVLLEPGSKNLSIYQTLLAAHERLQAL* +>15 +MRIKPDDNWRWYYDEEHDRMMLDLANGMLFRSRFSRKMLTPDAFCPTGFCVDDAALYFSFEEKCRDFELTKEQRAELVLNALVAIRYLKPQMPKSWHFVAHGEMWTPGTGDAASVWLSDTAEQVNLLVVEPGENAALCLLAQPGVVIAGRTMQLGDAIKIMNDRLKPQVHCHSFSLEQAV* +>16 +VLAFTLRFIKNKRYFAILAGALVIIAGLASQHAWSGNGLPQINGKALAALAKQHPVVVLFRHAERCDRSDNTCLSDSTGITVNGAQDARALGKAFSADIQNYNLYSSNTVRTIQSATWFSAGRSLTADKKMMDCGSGIYASINTLLKKSQNKNIVIFTHNHCLTYIAKNKRGVKFDPDYLNALVMYAENGKLFLDGEFVPG* +>17 +MGSNYIVIEGLEGAGKTTARDVVVETLEQLGIRNMIFTREPGGTQLAEKLRSLVLDIRSVGDEVITDKAEVLMFYAARVQLVETVIKPALAQGVWVIGDRHDLSTQAYQGGGRGIDQTMLATLRDAVLGDFRPDLTLYLDVTPEVGLKRARARGDLDRIEQESFDFFNRTRARYLELAAQDSRIRTIDATQPLDAVMRDIRATVTKWVQEQAA* +>18 +MKHIKKSVLVVLLTSHVAHASIVVGGTRLVFDGNNDESSINVENKDSKANLVQSWLSVADPQVTNKQVFIITPPLFRLDAGQKNSIRVIRSGAPLPADRESMYWLNIKGIPSIDDNASANRVEISINTQIKLIYRPPALTKSTPDSQSQQLKWQTAGDVITVNNPTPYYMNFASVTLNSHEVKSATFVPPKSSASFKLSSTAAPHGTVTWRLISDYGMSLEPHSGSF* diff --git a/tests/test_workflows.yml b/tests/test_workflows.yml index a16eb47..bd2ac83 100644 --- a/tests/test_workflows.yml +++ b/tests/test_workflows.yml @@ -58,7 +58,6 @@ - path: test_out/extract/G1/raw.extracted.seqs.fasta md5sum: 263d767c0503521372f8ea3c0689073a - path: test_out/search/G1/seq_store.json - md5sum: 9e33a22323f939371a203c47ebeede0d - path: test_out/report/conservative/G1/report.json md5sum: e048b379915dc91e93d048cd89c78ae2 - path: test_out/report/normal/G1/report.json @@ -77,7 +76,6 @@ - path: test_out/extract/G2/raw.extracted.seqs.fasta md5sum: a22c3896a3b2a5d8e0c0793c3848f0b2 - path: test_out/search/G2/seq_store.json - md5sum: 735c90328b01180800056f63e983c5a4 - path: test_out/report/conservative/G2/report.json md5sum: b5999c77be20fef1f681a620cb5b071c - path: test_out/report/normal/G2/report.json @@ -96,7 +94,6 @@ - path: test_out/extract/G3/raw.extracted.seqs.fasta md5sum: 263d767c0503521372f8ea3c0689073a - path: test_out/search/G3/seq_store.json - md5sum: 9a5b510de5cd2f5e454f7bf9dc10accf - path: test_out/report/conservative/G3/report.json md5sum: 30bf0e9d8198b05d639b4a3ab04f7cff - path: test_out/report/normal/G3/report.json @@ -115,7 +112,6 @@ - path: test_out/extract/G4/raw.extracted.seqs.fasta md5sum: a22c3896a3b2a5d8e0c0793c3848f0b2 - path: test_out/search/G4/seq_store.json - md5sum: 1c150a532cbb42efe4794130b5fb5f5d - path: test_out/report/conservative/G4/report.json md5sum: c4e56ec3a72f97a164dcdde217795434 - path: test_out/report/normal/G4/report.json @@ -134,7 +130,6 @@ - path: test_out/extract/G5/raw.extracted.seqs.fasta md5sum: 1fa0a1e6302e05ef9b131d71577e4be8 - path: test_out/search/G5/seq_store.json - md5sum: a0bab6959b2c9294292f4499d8c4563c - path: test_out/report/conservative/G5/report.json md5sum: 09a5009eedc1a898448078a54f8e28a8 - path: test_out/report/normal/G5/report.json @@ -153,7 +148,6 @@ - path: test_out/extract/G6/raw.extracted.seqs.fasta md5sum: 9c76d604cbf87f0ce649b0b3ebfc4854 - path: test_out/search/G6/seq_store.json - md5sum: def35eb2b6301b8ab6fb31ecaf995d78 - path: test_out/report/conservative/G6/report.json md5sum: a9bfc3bf4a32a181fa56f037a1265347 - path: test_out/report/normal/G6/report.json @@ -172,7 +166,6 @@ - path: test_out/extract/G7/raw.extracted.seqs.fasta md5sum: 14814906e4a1dadde2e422fb84aef547 - path: test_out/search/G7/seq_store.json - md5sum: 8e8ed91f9baaffc0ab27a2a26df7a647 - path: test_out/report/conservative/G7/report.json md5sum: 16b7d2a1e6e8888b57bbc11cab682492 - path: test_out/report/normal/G7/report.json @@ -191,7 +184,6 @@ - path: test_out/extract/G8/raw.extracted.seqs.fasta md5sum: ca09751ff5ed7b3f5fec1a6d41696a0b - path: test_out/search/G8/seq_store.json - md5sum: 95673d95dcd5bd54afc81f8788e5ef97 - path: test_out/report/conservative/G8/report.json md5sum: 312756f9eaf90483c95252daa92bea65 - path: test_out/report/normal/G8/report.json @@ -210,7 +202,6 @@ - path: test_out/extract/G9/raw.extracted.seqs.fasta md5sum: 3c98833aea5abc427222c9ef1ea0be61 - path: test_out/search/G9/seq_store.json - md5sum: c502a31310f6584a10b4378c5a1c2d82 - path: test_out/report/conservative/G9/report.json md5sum: 55ab5181170966ead133c425fded1060 - path: test_out/report/normal/G9/report.json @@ -229,7 +220,6 @@ - path: test_out/extract/G10/raw.extracted.seqs.fasta md5sum: 3b8981e69b40ee7118d8356c89280bcb - path: test_out/search/G10/seq_store.json - md5sum: cd28ea213bd681c7abba59e6cf68bdb8 - path: test_out/report/conservative/G10/report.json md5sum: c22f72415ec2eac72c3260edfc099dca - path: test_out/report/normal/G10/report.json @@ -248,7 +238,6 @@ - path: test_out/extract/G11/raw.extracted.seqs.fasta md5sum: e2bdd2194087059f1b7a7ac664d3fc3f - path: test_out/search/G11/seq_store.json - md5sum: 439b41382a87960e7d123afc060aaae8 - path: test_out/report/conservative/G11/report.json md5sum: 2a36a4242d2742d7c0cfac39eefd482c - path: test_out/report/normal/G11/report.json @@ -266,8 +255,7 @@ files: - path: test_out/extract/G12/raw.extracted.seqs.fasta md5sum: 8eb5977e87e795e3dbc50d98af4d2b45 - - path: test_out/search/G12/seq_store. - md5sum: ba3cd7d9e5e243b85cf5cb8347fb1c3f + - path: test_out/search/G12/seq_store.json - path: test_out/report/conservative/G12/report.json md5sum: 05cb031b16a5e40299d0f7ccefe7ac8c - path: test_out/report/normal/G12/report.json @@ -286,7 +274,6 @@ - path: test_out/extract/G13/raw.extracted.seqs.fasta md5sum: f5abee1fa5628d3ccffbe2b5d03e677d - path: test_out/search/G13/seq_store.json - md5sum: c5f24a2fcdc2b118485db8373f239ac5 - path: test_out/report/conservative/G13/report.json md5sum: 6b2d0b51683274983ea117d84a1113ee - path: test_out/report/normal/G13/report.json @@ -305,7 +292,6 @@ - path: test_out/extract/G14/raw.extracted.seqs.fasta md5sum: 0c1255c5ed4ee62d4e21eef8e3dafc06 - path: test_out/search/G14/seq_store.json - md5sum: 35aab22a5e554b3db63940b873e267b9 - path: test_out/report/conservative/G14/report.json md5sum: 1de0c6f82e484fb6a14a438e256b058b - path: test_out/report/normal/G14/report.json