Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update/outputs #23

Merged
merged 6 commits into from
May 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions locidex/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,9 +150,7 @@ def run(cmd_args=None):
input_file = cmd_args.input_file
outdir = cmd_args.outdir
force = cmd_args.force
run_data = dict()
run_data['analysis_start_time'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
run_data['parameters'] = vars(cmd_args)


config = DBConfig(
db_name=cmd_args.name,
Expand All @@ -162,6 +160,12 @@ def run(cmd_args=None):
db_date=datetime.now().strftime("%Y/%d/%m"),
)

run_params = vars(cmd_args)
run_params = run_params | config.to_dict()
run_data = dict()
run_data['analysis_start_time'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
run_data['parameters'] = vars(cmd_args)

if not os.path.isfile(input_file):
logger.critical(f'Error {input_file} does not exist, please check path and try again')
raise_file_not_found_e(input_file, logger)
Expand Down
3 changes: 0 additions & 3 deletions locidex/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,9 +142,6 @@ class ManifestFields:
db_path: str = "path"
config_data: str = "config"

SEARCH_RUN_DATA = {

}


DB_EXPECTED_FILES = {
Expand Down
5 changes: 3 additions & 2 deletions locidex/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from locidex.manifest import DBData
from locidex.classes.db import search_db_conf, db_config
from locidex.classes.seq_intake import seq_intake, seq_store
from locidex.constants import SEARCH_RUN_DATA, FILE_TYPES, BlastColumns, BlastCommands, DBConfig, DB_EXPECTED_FILES, EXTRACT_MODES, raise_file_not_found_e
from locidex.constants import FILE_TYPES, BlastColumns, BlastCommands, DBConfig, DB_EXPECTED_FILES, EXTRACT_MODES, raise_file_not_found_e
from locidex.version import __version__
from locidex.classes.aligner import perform_alignment, aligner
from locidex.utils import check_db_groups, get_format
Expand Down Expand Up @@ -106,7 +106,8 @@ def run_extract(config):
if sample_name == None:
sample_name = os.path.basename(input_fasta)

run_data = SEARCH_RUN_DATA
config = config | db_data.config_data.to_dict() # update config data to use db data
run_data = dict()
run_data['analysis_start_time'] = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
run_data['parameters'] = config

Expand Down
16 changes: 9 additions & 7 deletions locidex/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,14 +69,17 @@ def __init__(self,input,header,is_protein=False,delim="_",trans_table=11,
else:
self.parse_fasta(self.input)

def process_dir(self):
def process_dir(self) -> None:
files = self.get_dir_files(self.input)
for f in files[self.__file_input]:
for e in self.valid_ext:
if e in f[1]:
if f[1].endswith(e):
self.gene_name = f[1].replace(f'{e}','')
self.parse_fasta(f[0])
break
else:
logger.critical("File: {} does not have a valid extension. Valid extensions are: {}".format(e, str(self.valid_ext)))
raise ValueError("Extension for file: {} is not allowed.".format())

def set_input_type(self):
if os.path.isfile(self.input):
Expand Down Expand Up @@ -133,9 +136,9 @@ def parse_fasta(self, input_file):
_open = partial(gzip.open, mode='rt') if encoding == 'gzip' else open
with _open(input_file) as f:
for record in SeqIO.parse(f, 'fasta'):
id = str(record.id)
id_in = str(record.id)
if self.input_type == self.__file_input:
gene_name = "_".join(id.split(self.delim)[:-1])
gene_name = "_".join(id_in.split(self.delim)[:-1])
else:
gene_name = self.gene_name
dna_seq = str(record.seq).lower().replace('-','')
Expand All @@ -153,10 +156,10 @@ def parse_fasta(self, input_file):
row = LocidexDBHeader(
seq_id=self.seq_idx,
locus_name=gene_name,
locus_name_alt=id,
locus_name_alt=id_in,
locus_product='',
locus_description='',
locus_uid=id.split(self.delim)[-1],
locus_uid=id_in.split(self.delim)[-1],
dna_seq=dna_seq,
dna_seq_len=dna_len,
dna_seq_hash=calc_md5([dna_seq])[0],
Expand All @@ -174,7 +177,6 @@ def parse_fasta(self, input_file):
dna_min_ident=self.min_ident_perc,
min_dna_match_cov=self.min_cov_perc
)

self.data[self.seq_idx] = row
self.seq_idx += 1

Expand Down
2 changes: 2 additions & 0 deletions locidex/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@ def main(argv=None):
if args.command is None:
parser.print_help()
sys.exit()
logger.info("Running {}".format(args.command))
tasks[args.command][module_idx].run(args)
logger.info("Finished: {}".format(args.command))


# call main function
Expand Down
4 changes: 2 additions & 2 deletions locidex/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import errno
from typing import Any
from locidex.classes.seq_intake import seq_intake
from locidex.constants import SEARCH_RUN_DATA, START_CODONS, STOP_CODONS, DBConfig
from locidex.constants import START_CODONS, STOP_CODONS, DBConfig
from locidex.utils import calc_md5
from locidex.version import __version__

Expand Down Expand Up @@ -387,7 +387,7 @@ def run_report(config):
translation_table = config['translation_table']


run_data = SEARCH_RUN_DATA
run_data = dict()
run_data['analysis_start_time'] = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
run_data['parameters'] = analysis_parameters

Expand Down
9 changes: 4 additions & 5 deletions locidex/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from locidex.classes.db import search_db_conf, db_config
from locidex.manifest import DBData
from locidex.classes.seq_intake import seq_intake, seq_store, HitFilters
from locidex.constants import BlastCommands, SEARCH_RUN_DATA, FILE_TYPES, BlastColumns, DB_EXPECTED_FILES, OPTION_GROUPS, DBConfig
from locidex.constants import BlastCommands, FILE_TYPES, BlastColumns, DB_EXPECTED_FILES, OPTION_GROUPS, DBConfig
from locidex.utils import write_seq_dict, check_db_groups, slots, get_format
from locidex.version import __version__

Expand Down Expand Up @@ -136,13 +136,12 @@ def run_search(config):
if sample_name == None:
sample_name = query_file.stem


run_data = SEARCH_RUN_DATA
db_data = DBData(db_dir=db_dir)
config = config | db_data.config_data.to_dict()
run_data = dict()
run_data['analysis_start_time'] = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
run_data['parameters'] = config

db_data = DBData(db_dir=db_dir)

if os.path.isdir(outdir) and not force:
logger.critical(f'Error {outdir} exists, if you would like to overwrite, then specify --force')
raise FileExistsError(errno.EEXIST, os.strerror(errno.EEXIST), str(outdir))
Expand Down
Loading