From e860602879244dc381b78c216f97be744c2bb4c6 Mon Sep 17 00:00:00 2001 From: Matthew Wells Date: Tue, 28 May 2024 14:56:17 -0500 Subject: [PATCH 1/2] fixed aa storage in six frame translation --- locidex/classes/seq_intake.py | 4 ++-- locidex/extract.py | 4 +--- locidex/main.py | 22 +++++++++++----------- locidex/search.py | 12 +++++++----- locidex/utils.py | 8 ++++---- 5 files changed, 25 insertions(+), 25 deletions(-) diff --git a/locidex/classes/seq_intake.py b/locidex/classes/seq_intake.py index aeaaea7..1c5ffdc 100644 --- a/locidex/classes/seq_intake.py +++ b/locidex/classes/seq_intake.py @@ -165,7 +165,7 @@ def process_fasta(self, seq_data = []) -> list[SeqObject]: if self.skip_trans: aa_seq = '' else: - aa_seq = six_frame_translation(dna_seq,self.translation_table)[0][0] + aa_seq = six_frame_translation(dna_seq,self.translation_table)[0] aa_hash = calc_md5([aa_seq])[0] aa_len = len(aa_seq) @@ -211,7 +211,7 @@ def process_seq_hash(self,sequences) -> list[SeqObject]: if self.skip_trans: aa_seq = '' else: - aa_seq = six_frame_translation(dna_seq, self.translation_table)[0][0] + aa_seq = six_frame_translation(dna_seq, self.translation_table)[0] aa_hash = calc_md5([aa_seq])[0] aa_len = len(aa_seq) else: diff --git a/locidex/extract.py b/locidex/extract.py index 8fa3832..fa4425c 100644 --- a/locidex/extract.py +++ b/locidex/extract.py @@ -101,7 +101,7 @@ def run_extract(config): if not mode in EXTRACT_MODES: logger.critical('Provided mode for allele extraction is not valid: {}, needs to be one of ({})'.format(mode, ", ".join(EXTRACT_MODES))) - sys.exit() + raise ValueError('Extraction mode is not valid: {}, needs to be one of ({})'.format(mode)) if sample_name == None: sample_name = os.path.basename(input_fasta) @@ -279,9 +279,7 @@ def run(cmd_args=None): cmd_args = parser.parse_args() analysis_parameters = vars(cmd_args) - analysis_parameters = check_db_groups(analysis_params=analysis_parameters, cmd_args=cmd_args) - config_file = cmd_args.config config = {} diff --git a/locidex/main.py b/locidex/main.py index f3a8a33..3d93e3f 100644 --- a/locidex/main.py +++ b/locidex/main.py @@ -28,26 +28,26 @@ def main(argv=None): for k, v in tasks.items(): format_parser = sub_parsers.add_parser(k, description=v[help_msg], help=v[help_msg]) v[module_idx].add_args(format_parser) - + args = parser.parse_args(argv) if args.command is None: parser.print_help() sys.exit() - logger.info("Running {}".format(args.command)) - tasks[args.command][module_idx].run(args) - logger.info("Finished: {}".format(args.command)) - -# call main function -if __name__ == '__main__': - error_file = "error.txt" + error_file = "errors.txt" try: - main() + logger.info("Running {}".format(args.command)) + tasks[args.command][module_idx].run(args) + logger.info("Finished: {}".format(args.command)) except Exception as e: with open(error_file, "w") as f: f.write(traceback.format_exc()) error_number = e.errno if hasattr(e, "errno") else -1 logger.critical("Program exited with errors, please review logs. For the full traceback please see file: {}".format(error_file)) - raise SystemExit(error_number) + SystemExit(error_number) else: - sys.exit("Program finished without errors.") \ No newline at end of file + sys.exit("Program finished without errors.") + +# call main function +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/locidex/search.py b/locidex/search.py index 57f896f..e46cb64 100644 --- a/locidex/search.py +++ b/locidex/search.py @@ -90,14 +90,16 @@ def create_outputs(output_dir: Path, db_data: DBData, blast_params: dict, config This function will have some needed clean up once the cli is tidied """ - hsps_out = "hsps.txt" #? Need to follow up on what hsps stands for + hsps_out = "hsps.txt" #? Need to follow up on what hsps stands for, hsps is a blast that needs to included label_col = 'index' - query_fasta = output_dir.joinpath("queries.fasta") - output_hsps = output_dir.joinpath(hsps_out) - if not output_dir.exists() or not output_dir.is_dir(): - os.makedirs(output_dir, 0o755) + output_directory = output_dir.joinpath(configuration.output_dir) + query_fasta = output_directory.joinpath("queries.fasta") + output_hsps = output_directory.joinpath(hsps_out) + if not output_directory.exists() or not output_directory.is_dir(): + os.makedirs(output_directory, 0o755, exist_ok=True) # exist okay is true, as the program will error out before it clobbers the directory output_file = create_fasta_from_df(filtered_df, label_col=label_col, seq_col=configuration.seq_col, out_file=query_fasta) + #! Blast search output columns should be showing up search_data = BlastSearch(db_data, output_file, blast_params, configuration.program, BlastColumns._fields, filter_options) searched_df = search_data.get_blast_data(configuration.db_dir, output_hsps) return searched_df diff --git a/locidex/utils.py b/locidex/utils.py index 23105e8..df06ee9 100644 --- a/locidex/utils.py +++ b/locidex/utils.py @@ -6,7 +6,7 @@ from pathlib import Path from locidex.manifest import ManifestItem from Bio.Seq import Seq -from typing import Dict, FrozenSet, Optional +from typing import Dict, FrozenSet, Optional, List from locidex.constants import NT_SUB, PROTEIN_ALPHA, DNA_ALPHA, OPTION_GROUPS, FILE_TYPES import locidex.manifest as manifest @@ -80,7 +80,7 @@ def translate_dna(dna_seq,trans_table=11): s = ''.join(list(dna_seq)[:-r]) return str(Seq(s).translate(table=trans_table)) -def six_frame_translation(dna_seq,trans_table): +def six_frame_translation(dna_seq,trans_table) -> List[str]: fwd = [] rev = [] for i in range(0,3): @@ -132,8 +132,8 @@ def write_seq_list(seqs,output_file,format='json',seq_type='dna',seq_id_key='ind def write_seq_dict(data,output_file): with open(output_file, 'w') as oh: - for id in data: - oh.write(f">{id}\n{data[id]}\n") + for fid, seq in data.items(): + oh.write(f">{fid}\n{seq}\n") return output_file From e061f4dd66fc27dfbc0209cca6f0ba5d4a820d02 Mon Sep 17 00:00:00 2001 From: Matthew Wells Date: Tue, 28 May 2024 15:32:10 -0500 Subject: [PATCH 2/2] fixed aa storage in six frame translation --- locidex/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/locidex/main.py b/locidex/main.py index 3d93e3f..963995d 100644 --- a/locidex/main.py +++ b/locidex/main.py @@ -46,7 +46,7 @@ def main(argv=None): logger.critical("Program exited with errors, please review logs. For the full traceback please see file: {}".format(error_file)) SystemExit(error_number) else: - sys.exit("Program finished without errors.") + logger.info("Program finished without errors.") # call main function if __name__ == '__main__':