phac-nml · mattheww95 · May 28, 2024 · May 28, 2024 · May 28, 2024
diff --git a/locidex/classes/seq_intake.py b/locidex/classes/seq_intake.py
@@ -165,7 +165,7 @@ def process_fasta(self, seq_data = []) -> list[SeqObject]:
                 if self.skip_trans:
                     aa_seq = ''
                 else:
-                    aa_seq = six_frame_translation(dna_seq,self.translation_table)[0][0]
+                    aa_seq = six_frame_translation(dna_seq,self.translation_table)[0]
 
                 aa_hash = calc_md5([aa_seq])[0]
                 aa_len = len(aa_seq)
@@ -211,7 +211,7 @@ def process_seq_hash(self,sequences) -> list[SeqObject]:
                 if self.skip_trans:
                     aa_seq = ''
                 else:
-                    aa_seq = six_frame_translation(dna_seq, self.translation_table)[0][0]
+                    aa_seq = six_frame_translation(dna_seq, self.translation_table)[0]
                 aa_hash = calc_md5([aa_seq])[0]
                 aa_len = len(aa_seq)
             else:

diff --git a/locidex/extract.py b/locidex/extract.py
@@ -101,7 +101,7 @@ def run_extract(config):
 
     if not mode in EXTRACT_MODES:
         logger.critical('Provided mode for allele extraction is not valid: {}, needs to be one of ({})'.format(mode, ", ".join(EXTRACT_MODES)))
-        sys.exit()
+        raise ValueError('Extraction  mode is not valid: {}, needs to be one of ({})'.format(mode))
 
     if sample_name == None:
         sample_name = os.path.basename(input_fasta)
@@ -279,9 +279,7 @@ def run(cmd_args=None):
         cmd_args = parser.parse_args()
 
     analysis_parameters = vars(cmd_args)
-
     analysis_parameters = check_db_groups(analysis_params=analysis_parameters, cmd_args=cmd_args)
-
     config_file = cmd_args.config
 
     config = {}

diff --git a/locidex/main.py b/locidex/main.py
@@ -28,26 +28,26 @@ def main(argv=None):
     for k, v in tasks.items():
         format_parser = sub_parsers.add_parser(k, description=v[help_msg], help=v[help_msg])
         v[module_idx].add_args(format_parser)
-
+        
     args = parser.parse_args(argv)
     if args.command is None:
         parser.print_help()
         sys.exit()
-    logger.info("Running {}".format(args.command))
-    tasks[args.command][module_idx].run(args)
-    logger.info("Finished: {}".format(args.command))
-
 
-# call main function
-if __name__ == '__main__':
-    error_file = "error.txt"
+    error_file = "errors.txt"
     try:
-        main()
+        logger.info("Running {}".format(args.command))
+        tasks[args.command][module_idx].run(args)
+        logger.info("Finished: {}".format(args.command))
     except Exception as e:
         with open(error_file, "w") as f:
             f.write(traceback.format_exc())
         error_number = e.errno if hasattr(e, "errno") else -1
         logger.critical("Program exited with errors, please review logs. For the full traceback please see file: {}".format(error_file))
-        raise SystemExit(error_number)
+        SystemExit(error_number)
     else:
-        sys.exit("Program finished without errors.")
+        logger.info("Program finished without errors.")
+
+# call main function
+if __name__ == '__main__':
+    main()
diff --git a/locidex/search.py b/locidex/search.py
@@ -90,14 +90,16 @@ def create_outputs(output_dir: Path, db_data: DBData, blast_params: dict, config
 
     This function will have some needed clean up once the cli is tidied
     """
-    hsps_out = "hsps.txt" #? Need to follow up on what hsps stands for
+    hsps_out = "hsps.txt" #? Need to follow up on what hsps stands for, hsps is a blast that needs to included
     label_col = 'index'
-    query_fasta = output_dir.joinpath("queries.fasta")
-    output_hsps = output_dir.joinpath(hsps_out)
-    if not output_dir.exists() or not output_dir.is_dir():
-        os.makedirs(output_dir, 0o755)
+    output_directory = output_dir.joinpath(configuration.output_dir)
+    query_fasta = output_directory.joinpath("queries.fasta")
+    output_hsps = output_directory.joinpath(hsps_out)
+    if not output_directory.exists() or not output_directory.is_dir():
+        os.makedirs(output_directory, 0o755, exist_ok=True) # exist okay is true, as the program will error out before it clobbers the directory
 
     output_file = create_fasta_from_df(filtered_df, label_col=label_col, seq_col=configuration.seq_col, out_file=query_fasta)
+    #! Blast search output columns should be showing up
     search_data = BlastSearch(db_data, output_file, blast_params, configuration.program, BlastColumns._fields, filter_options)
     searched_df = search_data.get_blast_data(configuration.db_dir, output_hsps)
     return searched_df

diff --git a/locidex/utils.py b/locidex/utils.py
@@ -6,7 +6,7 @@
 from pathlib import Path
 from locidex.manifest import ManifestItem
 from Bio.Seq import Seq
-from typing import Dict, FrozenSet, Optional
+from typing import Dict, FrozenSet, Optional, List
 from locidex.constants import NT_SUB, PROTEIN_ALPHA, DNA_ALPHA, OPTION_GROUPS, FILE_TYPES
 import locidex.manifest as manifest 
 
@@ -80,7 +80,7 @@ def translate_dna(dna_seq,trans_table=11):
         s = ''.join(list(dna_seq)[:-r])
     return str(Seq(s).translate(table=trans_table))
 
-def six_frame_translation(dna_seq,trans_table):
+def six_frame_translation(dna_seq,trans_table) -> List[str]:
     fwd = []
     rev = []
     for i in range(0,3):
@@ -132,8 +132,8 @@ def write_seq_list(seqs,output_file,format='json',seq_type='dna',seq_id_key='ind
 
 def write_seq_dict(data,output_file):
     with open(output_file, 'w') as oh:
-        for id in data:
-            oh.write(f">{id}\n{data[id]}\n")
+        for fid, seq in data.items():
+            oh.write(f">{fid}\n{seq}\n")
     return output_file