Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding/logs #21

Merged
merged 7 commits into from
May 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ __pycache__
.vscode
.pytest_cache
tmp
.coverage*
.coverage*
error.txt
14 changes: 7 additions & 7 deletions locidex/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@
from locidex.version import __version__
from locidex.constants import DBFiles
from locidex.classes import run_command
from locidex.constants import DBConfig, MetadataFields
from locidex.constants import DBConfig, MetadataFields, raise_file_not_found_e
from locidex.classes.blast import BlastMakeDB
from locidex.manifest import DBData
import getpass

import errno
import logging
import sys

Expand Down Expand Up @@ -71,7 +71,7 @@ def init_dir(self, d: Path):
d.mkdir(parents=True, exist_ok=self.force, mode=0o755)
except FileExistsError:
logger.critical("Database file {} already exists. To overwrite please run with --force".format(d))
sys.exit(17)
raise FileExistsError(errno.EEXIST, os.strerror(errno.EEXIST), str(d))
return True


Expand Down Expand Up @@ -163,14 +163,14 @@ def run(cmd_args=None):
)

if not os.path.isfile(input_file):
print(f'Error {input_file} does not exist, please check path and try again')
sys.exit()
logger.critical(f'Error {input_file} does not exist, please check path and try again')
raise_file_not_found_e(input_file, logger)

obj = locidex_build(Path(input_file), Path(outdir),config=config,seq_columns={'nucleotide':'dna_seq','protein':'aa_seq'},force=force)

if obj.status == False:
print(f'Error something went wrong building the db, check error messages {obj.messages}')
sys.exit()
logger.critical(f'Error something went wrong building the db.')
raise ValueError("Something went wrong building db.")

run_data['analysis_end_time'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
with open(os.path.join(outdir,DBFiles.config_file),"w") as oh:
Expand Down
3 changes: 2 additions & 1 deletion locidex/classes/blast.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def makeblastdb(self):
logger.debug("Blast makedb stdout: {}".format(stdout))
if stderr:
logger.debug("Blast makedb stderr: {}".format(stderr))

self.checkblastdb(self.output_db_path)
return self.output_db_path

Expand Down Expand Up @@ -142,6 +142,7 @@ def read_hit_table(self, blast_data):
def _check_blast_files(self, db_dir: Path, extensions: frozenset):
"""
"""
logger.info("Verifying blast database: {}".format(str(db_dir)))
extensions_ = set([i.suffix for i in db_dir.iterdir()])
if not extensions_.issuperset(extensions):
raise ValueError("Missing required blast files. {}".format([i for i in extensions_ if i not in extensions]))
Expand Down
8 changes: 7 additions & 1 deletion locidex/classes/fasta.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,14 @@
from functools import partial
import os
from locidex.utils import calc_md5, slots
import logging
from locidex.constants import raise_file_not_found_e
from dataclasses import dataclass
from pathlib import Path
import sys

logger = logging.getLogger(__name__)
logging.basicConfig(filemode=sys.stderr, level=logging.DEBUG)

@dataclass
class Fasta:
Expand All @@ -24,7 +30,7 @@ class ParseFasta:
def __init__(self, input_file: Path,parse_def=False,seq_type=None,delim="|"):
self.input_file = input_file
if not self.input_file.exists():
raise FileNotFoundError("Input file: {} not found.".format(self.input_file))
raise_file_not_found_e(self.input_file, logger)

self.delim = delim
self.seq_type = seq_type
Expand Down
2 changes: 1 addition & 1 deletion locidex/classes/gbk.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@
from locidex.utils import revcomp,calc_md5

class parse_gbk:
status = True
messages = []

def __init__(self,input_file):
self.input_file= input_file
self.status = True
if not os.path.isfile(self.input_file):
self.messages.append(f'Error {self.input_file} does not exist')
self.status = False
Expand Down
9 changes: 7 additions & 2 deletions locidex/classes/seq_intake.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,14 @@
from locidex.classes.fasta import ParseFasta
from locidex.utils import guess_alphabet, calc_md5, six_frame_translation, slots
from locidex.classes.prodigal import gene_prediction
from locidex.constants import DNA_AMBIG_CHARS, DNA_IUPAC_CHARS, CharacterConstants, DBConfig
from locidex.constants import DNA_AMBIG_CHARS, DNA_IUPAC_CHARS, CharacterConstants, DBConfig, raise_file_not_found_e
import logging
from typing import NamedTuple, Optional, List
from dataclasses import dataclass, asdict

logger = logging.getLogger(__name__)
logging.basicConfig(filemode=sys.stderr, level=logging.INFO)

@dataclass
class HitFilters:
min_dna_len: int
Expand Down Expand Up @@ -50,7 +54,8 @@ class seq_intake:

def __init__(self,input_file,file_type,feat_key='CDS',translation_table=11,perform_annotation=False,num_threads=1,skip_trans=False):
if not input_file.exists():
raise FileNotFoundError("File {} does not exist.".format(input_file))
logger.critical("Seq_intake file {} could not be found".format(input_file))
raise_file_not_found_e(input_file, logger)

self.input_file = input_file
self.file_type = file_type
Expand Down
28 changes: 10 additions & 18 deletions locidex/constants.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@

from dataclasses import dataclass, asdict, fields
import pathlib
import logging
import os
import errno
from typing import Any, Union, NamedTuple, Optional

DNA_AMBIG_CHARS = ['b', 'd', 'e', 'f', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 'u', 'v', 'w', 'x',
Expand All @@ -23,24 +26,13 @@ class CharacterConstants:
stop_codon: str = "*"


#BLAST_TABLE_COLS = '''
#qseqid
#sseqid
#qlen
#slen
#qstart
#qend
#sstart
#send
#length
#mismatch
#pident
#qcovhsp
#qcovs
#sstrand
#evalue
#bitscore
#'''.strip().split('\n')
def raise_file_not_found_e(file: Union[str, pathlib.Path], logger: logging.Logger):
"""
Raise a file not found error.
"""
logger.critical("File supplied does not exist: {}".format(str(file)))
raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), str(file))


class BlastColumns(NamedTuple):
qseqid: str
Expand Down
28 changes: 17 additions & 11 deletions locidex/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,21 @@
from datetime import datetime
import numpy as np
import pandas as pd
import logging
import errno
from locidex.classes.extractor import extractor

from locidex.classes.blast import BlastSearch, FilterOptions, BlastMakeDB
from locidex.manifest import DBData
from locidex.classes.db import search_db_conf, db_config
from locidex.classes.seq_intake import seq_intake, seq_store
from locidex.constants import SEARCH_RUN_DATA, FILE_TYPES, BlastColumns, BlastCommands, DBConfig, DB_EXPECTED_FILES, NT_SUB, EXTRACT_MODES, OPTION_GROUPS
from locidex.constants import SEARCH_RUN_DATA, FILE_TYPES, BlastColumns, BlastCommands, DBConfig, DB_EXPECTED_FILES, EXTRACT_MODES, raise_file_not_found_e
from locidex.version import __version__
from locidex.classes.aligner import perform_alignment, aligner
from locidex.utils import check_db_groups

logger = logging.getLogger(__name__)
logging.basicConfig(filemode=sys.stderr, level=logging.INFO)

def add_args(parser=None):
if parser is None:
parser = ArgumentParser(
Expand Down Expand Up @@ -96,7 +100,7 @@ def run_extract(config):


if not mode in EXTRACT_MODES:
print(f'Provided mode for allele extraction is not valid: {mode}, needs to be one of (snps, trim, extend, raw)')
logger.critical('Provided mode for allele extraction is not valid: {}, needs to be one of ({})'.format(mode, ", ".join(EXTRACT_MODES)))
sys.exit()

if sample_name == None:
Expand All @@ -117,25 +121,26 @@ def run_extract(config):

if format is None or format not in FILE_TYPES:
if format is None:
print(f'Could not guess format for {input_fasta}')
logger.critical(f'Could not guess format for {input_fasta}')
raise ValueError("Could not guess input type for: {}".format(str(input_fasta)))
else:
print(f'Format for query file must be one of {list(FILE_TYPES.keys())}, you supplied {format}')
sys.exit()
logger.critical(f'Format for query file must be one of {list(FILE_TYPES.keys())}, you supplied {format}')
raise ValueError(f'Format for query file must be one of {list(FILE_TYPES.keys())}, you supplied {format}')

seq_obj = seq_intake(input_fasta, format, 'source', translation_table, perform_annotation=False,skip_trans=True)

# Validate database is valid
db_database_config = search_db_conf(db_dir, DB_EXPECTED_FILES, DBConfig._keys())
if db_database_config.status == False:
print(f'There is an issue with provided db directory: {db_dir}\n {db_database_config.messages}')
sys.exit()
logger.critical(f'There is an issue with provided db directory: {db_dir}\n {db_database_config.messages}')
raise ValueError("There is an issue with the provided database: {}".format(db_dir))

metadata_path = db_database_config.meta_file_path
metadata_obj = db_config(metadata_path, ['meta', 'info'])

if os.path.isdir(outdir) and not force:
print(f'Error {outdir} exists, if you would like to overwrite, then specify --force')
sys.exit()
logger.critical(f'Error {outdir} exists, if you would like to overwrite, then specify --force')
raise FileExistsError(errno.EEXIST, os.strerror(errno.EEXIST), str(outdir))

db_path = os.path.join(outdir, 'blast_db')

Expand Down Expand Up @@ -176,7 +181,8 @@ def run_extract(config):
#nt_db = Path("{}.fasta".format(blast_database_paths['nucleotide']))
nt_db = Path("{}.fasta".format(db_data.nucleotide_blast_db))
if not nt_db.exists():
raise FileNotFoundError("Could not find nucleotide database: {}".format(nt_db))
logger.critical("Could not find file: {}".format(nt_db))
raise_file_not_found_e(nt_db, logger)

filter_options = {
'evalue': FilterOptions(min=None, max=min_evalue, include=None),
Expand Down
Empty file removed locidex/fetch.py
Empty file.
23 changes: 15 additions & 8 deletions locidex/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,20 @@
from mimetypes import guess_type
from dataclasses import dataclass
from typing import List, Tuple

import logging
import errno
import pandas as pd
from Bio import SeqIO
from Bio.Seq import Seq
from pyrodigal import GeneFinder

from locidex.constants import FILE_TYPES, LocidexDBHeader, CharacterConstants
from locidex.constants import FILE_TYPES, LocidexDBHeader, CharacterConstants, raise_file_not_found_e
from locidex.utils import six_frame_translation, revcomp, calc_md5
from locidex.version import __version__

logger = logging.getLogger(__name__)
logging.basicConfig(filemode=sys.stderr, level=logging.INFO)

class locidex_format:

input_type = None
Expand Down Expand Up @@ -80,6 +84,7 @@ def set_input_type(self):
elif os.path.isdir(self.input):
self.input_type = self.__dir_input
else:
logger.critical("Could not determine input type for: {}".format(self.input))
raise AttributeError("Unknown input type could not be determined for: {}".format(self.input))

def get_dir_files(self, input_dir):
Expand Down Expand Up @@ -220,19 +225,21 @@ def run(cmd_args=None):
run_data['parameters'] = vars(cmd_args)

if os.path.isdir(outdir) and not force:
print(f'Error {outdir} exists, if you would like to overwrite, then specify --force')
sys.exit()
logger.critical(f'Error {outdir} exists, if you would like to overwrite, then specify --force')
raise FileExistsError(errno.EEXIST, os.strerror(errno.EEXIST), str(outdir))


if not os.path.isdir(outdir):
os.makedirs(outdir, 0o755)

if not os.path.isdir(input) and not os.path.isfile(input):
print(f'Error {input} does not exist as a file or directory')
sys.exit()

logger.critical(f'Error {input} does not exist as a file or directory')
raise_file_not_found_e(input, logger)

logger.info("Beginning format operation.")
obj = locidex_format(input=input,header=LocidexDBHeader._fields,is_protein=is_coding,min_len_frac=min_len_frac,max_len_frac=max_len_frac, min_ident_perc=min_ident,
min_cov_perc=min_match_cov,trans_table=trans_table,valid_ext=FILE_TYPES['fasta'])

logger.info("Finished format.")
run_data['result_file'] = os.path.join(outdir,"locidex.txt")
pd.DataFrame.from_dict(obj.data,orient='index').to_csv(run_data['result_file'],sep="\t",index=False,header=True)

Expand Down
17 changes: 16 additions & 1 deletion locidex/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,13 @@

import sys
import argparse
import traceback
import logging
from . import format, extract, report, merge, search, build, manifest

logger = logging.getLogger(__name__)
logging.basicConfig(filemode=sys.stderr, level=logging.DEBUG)

tasks = {
'search': (search, 'Query set of Loci/Genes against a database to produce a sequence store for downstream processing'),
'extract': (extract, 'Extract loci from a genome based on a locidex database'),
Expand Down Expand Up @@ -33,4 +38,14 @@ def main(argv=None):

# call main function
if __name__ == '__main__':
sys.exit(main())
error_file = "error.txt"
try:
main()
except Exception as e:
with open(error_file, "w") as f:
f.write(traceback.format_exc())
error_number = e.errno if hasattr(e, "errno") else -1
logger.critical("Program exited with errors, please review logs. For the full traceback please see file: {}".format(error_file))
raise SystemExit(error_number)
else:
sys.exit("Program finished without errors.")
Loading
Loading