Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update/outputs #24

Merged
merged 2 commits into from
May 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions locidex/classes/seq_intake.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ def process_fasta(self, seq_data = []) -> list[SeqObject]:
if self.skip_trans:
aa_seq = ''
else:
aa_seq = six_frame_translation(dna_seq,self.translation_table)[0][0]
aa_seq = six_frame_translation(dna_seq,self.translation_table)[0]

aa_hash = calc_md5([aa_seq])[0]
aa_len = len(aa_seq)
Expand Down Expand Up @@ -211,7 +211,7 @@ def process_seq_hash(self,sequences) -> list[SeqObject]:
if self.skip_trans:
aa_seq = ''
else:
aa_seq = six_frame_translation(dna_seq, self.translation_table)[0][0]
aa_seq = six_frame_translation(dna_seq, self.translation_table)[0]
aa_hash = calc_md5([aa_seq])[0]
aa_len = len(aa_seq)
else:
Expand Down
4 changes: 1 addition & 3 deletions locidex/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def run_extract(config):

if not mode in EXTRACT_MODES:
logger.critical('Provided mode for allele extraction is not valid: {}, needs to be one of ({})'.format(mode, ", ".join(EXTRACT_MODES)))
sys.exit()
raise ValueError('Extraction mode is not valid: {}, needs to be one of ({})'.format(mode))

if sample_name == None:
sample_name = os.path.basename(input_fasta)
Expand Down Expand Up @@ -279,9 +279,7 @@ def run(cmd_args=None):
cmd_args = parser.parse_args()

analysis_parameters = vars(cmd_args)

analysis_parameters = check_db_groups(analysis_params=analysis_parameters, cmd_args=cmd_args)

config_file = cmd_args.config

config = {}
Expand Down
22 changes: 11 additions & 11 deletions locidex/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,26 +28,26 @@ def main(argv=None):
for k, v in tasks.items():
format_parser = sub_parsers.add_parser(k, description=v[help_msg], help=v[help_msg])
v[module_idx].add_args(format_parser)

args = parser.parse_args(argv)
if args.command is None:
parser.print_help()
sys.exit()
logger.info("Running {}".format(args.command))
tasks[args.command][module_idx].run(args)
logger.info("Finished: {}".format(args.command))


# call main function
if __name__ == '__main__':
error_file = "error.txt"
error_file = "errors.txt"
try:
main()
logger.info("Running {}".format(args.command))
tasks[args.command][module_idx].run(args)
logger.info("Finished: {}".format(args.command))
except Exception as e:
with open(error_file, "w") as f:
f.write(traceback.format_exc())
error_number = e.errno if hasattr(e, "errno") else -1
logger.critical("Program exited with errors, please review logs. For the full traceback please see file: {}".format(error_file))
raise SystemExit(error_number)
SystemExit(error_number)
else:
sys.exit("Program finished without errors.")
logger.info("Program finished without errors.")

# call main function
if __name__ == '__main__':
main()
12 changes: 7 additions & 5 deletions locidex/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,14 +90,16 @@ def create_outputs(output_dir: Path, db_data: DBData, blast_params: dict, config

This function will have some needed clean up once the cli is tidied
"""
hsps_out = "hsps.txt" #? Need to follow up on what hsps stands for
hsps_out = "hsps.txt" #? Need to follow up on what hsps stands for, hsps is a blast that needs to included
label_col = 'index'
query_fasta = output_dir.joinpath("queries.fasta")
output_hsps = output_dir.joinpath(hsps_out)
if not output_dir.exists() or not output_dir.is_dir():
os.makedirs(output_dir, 0o755)
output_directory = output_dir.joinpath(configuration.output_dir)
query_fasta = output_directory.joinpath("queries.fasta")
output_hsps = output_directory.joinpath(hsps_out)
if not output_directory.exists() or not output_directory.is_dir():
os.makedirs(output_directory, 0o755, exist_ok=True) # exist okay is true, as the program will error out before it clobbers the directory

output_file = create_fasta_from_df(filtered_df, label_col=label_col, seq_col=configuration.seq_col, out_file=query_fasta)
#! Blast search output columns should be showing up
search_data = BlastSearch(db_data, output_file, blast_params, configuration.program, BlastColumns._fields, filter_options)
searched_df = search_data.get_blast_data(configuration.db_dir, output_hsps)
return searched_df
Expand Down
8 changes: 4 additions & 4 deletions locidex/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from pathlib import Path
from locidex.manifest import ManifestItem
from Bio.Seq import Seq
from typing import Dict, FrozenSet, Optional
from typing import Dict, FrozenSet, Optional, List
from locidex.constants import NT_SUB, PROTEIN_ALPHA, DNA_ALPHA, OPTION_GROUPS, FILE_TYPES
import locidex.manifest as manifest

Expand Down Expand Up @@ -80,7 +80,7 @@ def translate_dna(dna_seq,trans_table=11):
s = ''.join(list(dna_seq)[:-r])
return str(Seq(s).translate(table=trans_table))

def six_frame_translation(dna_seq,trans_table):
def six_frame_translation(dna_seq,trans_table) -> List[str]:
fwd = []
rev = []
for i in range(0,3):
Expand Down Expand Up @@ -132,8 +132,8 @@ def write_seq_list(seqs,output_file,format='json',seq_type='dna',seq_id_key='ind

def write_seq_dict(data,output_file):
with open(output_file, 'w') as oh:
for id in data:
oh.write(f">{id}\n{data[id]}\n")
for fid, seq in data.items():
oh.write(f">{fid}\n{seq}\n")
return output_file


Expand Down
Loading