Skip to content

Commit 7cbd09b

Browse files
committed
Issues #70 + #71 | remove culling limit, fix minscore, v2.15
1 parent 0d3a5b7 commit 7cbd09b

File tree

1 file changed

+22
-16
lines changed

1 file changed

+22
-16
lines changed

bin/mlst

+22-16
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ use Path::Tiny; # bundled in $APPDIR/perl5/
1717
#..............................................................................
1818
# Globals
1919

20-
my $VERSION = "2.14";
20+
my $VERSION = "2.15";
2121
my $EXE = just_filename( $FindBin::RealScript );
2222
my $AUTHOR = 'Torsten Seemann';
2323
my $URL = 'https://github.com/tseemann/mlst';
@@ -60,6 +60,12 @@ my %novel; # keep track of novel alleles
6060
# in case user is forcing a scheme on us (legacy mode)
6161
$scheme && ! exists $scheme{$scheme} and err("Invalid --scheme '$scheme'. Check using --list");
6262

63+
# we set minscore to 0 if --scheme is set
64+
if ($scheme and $minscore != 0) {
65+
msg("Setting --miscore=0 because user chose --scheme");
66+
$minscore=0;
67+
}
68+
6369
if ($list or $longlist) {
6470
if ($list) {
6571
print join( " ", $pubmlst->names ), "\n";
@@ -79,7 +85,8 @@ require_exe(@DEPS);
7985
# build a hash of schemes to ignore from the CSV string
8086
my %exclude = map { ($_ => 1) } (split m/,/ , $exclude);
8187
%exclude = () if $scheme; # don't exclude anything is specific scheme provided
82-
msg("Excluding", scalar(keys %exclude), "schemes:", (keys %exclude));
88+
my $num_excluded = scalar(keys %exclude);
89+
msg("Excluding $num_excluded schemes:", (keys %exclude)) if $num_excluded > 0;
8390

8491
#..............................................................................
8592
# Output results
@@ -187,18 +194,20 @@ if ($novel) {
187194

188195
# Say our goodbyes
189196
my @motd = (
190-
"Please also cite 'Jolley & Maiden 2010, BMC Bioinf, 11:595' if you use $EXE",
191-
"You can use --label XXX to replace an ugly filename in the output",
192-
"$EXE also supports --json output for the modern bioinformatician",
197+
"Please also cite 'Jolley & Maiden 2010, BMC Bioinf, 11:595' if you use $EXE.",
198+
"You can use --label XXX to replace an ugly filename in the output.",
199+
"$EXE also supports --json output for the modern bioinformatician.",
193200
"Did you know $EXE also works on .gbk and .gz files?",
194-
"I am quite confident this won't be the last time you run $EXE",
195-
"Somehow this tool escaped my normal Australiana based naming system",
201+
"I am quite confident this won't be the last time you run $EXE.",
202+
"Somehow this tool escaped my normal Australiana based naming system.",
196203
"If you have problems, please file at https://github.com/tseemann/mlst/issues",
197204
"The manual is now quite extensive; read it at https://github.com/tseemann/mlst",
198-
"You can use '$EXE --longlist' to see all the supported schemes",
199-
"Use --quiet to avoid all the message output, including these witticisms",
200-
"Thanks for using $EXE, I hope you found it useful",
205+
"You can use '$EXE --longlist' to see all the supported schemes.",
206+
"Use --quiet or -q to avoid all the message output, including these witticisms.",
207+
"Thanks for using $EXE, I hope you found it useful.",
201208
"If you like MLST, you're going to absolutely love cgMLST!",
209+
"If you like MLST, you're absolutely going to love wgMLST!",
210+
"Remember that --minscore is only used when using automatic scheme detection.",
202211
"You can follow me on Twitter at \@torstenseemann",
203212
);
204213
msg( $motd[ int(rand(scalar(@motd))) ] );
@@ -210,12 +219,9 @@ exit(0);
210219
sub find_mlst {
211220
my($fname) = @_;
212221

213-
# https://github.com/tseemann/mlst/issues/69
214-
my $culling = $scheme ? 10 : 1;
215-
216222
my $cmd = "blastn -query \Q$fname\E -db \Q$blastdb\E -num_threads $threads"
217223
." -ungapped -dust no -word_size 32 -max_target_seqs 10000"
218-
." -perc_identity $minid -evalue 1E-20 -culling_limit $culling"
224+
." -perc_identity $minid -evalue 1E-20"
219225
." -outfmt '6 sseqid slen length nident qseqid qstart qend qseq'";
220226
msg("Running: $cmd") if $debug;
221227

@@ -236,7 +242,7 @@ sub find_mlst {
236242
next unless $nident/$hlen >= $mincov/100 ; # need min-cov to reach minid
237243

238244
if ($scheme and $sch ne $scheme) {
239-
msg("Skipping $sch.$gene.$num allele as user specified --scheme $scheme");
245+
msg("Skipping $sch.$gene.$num allele as user specified --scheme $scheme") if $debug;
240246
next;
241247
}
242248
if ($exclude{$sch}) {
@@ -352,7 +358,7 @@ sub setOptions {
352358
"SCORING",
353359
{OPT=>"minid=f", VAR=>\$minid, DEFAULT=>95, DESC=>"DNA %identity of full allelle to consider 'similar' [~]"},
354360
{OPT=>"mincov=f", VAR=>\$mincov, DEFAULT=>10, DESC=>"DNA %cov to report partial allele at all [?]"},
355-
{OPT=>"minscore=f", VAR=>\$minscore, DEFAULT=>50, DESC=>"Minumum score out of 100 to match a scheme"},
361+
{OPT=>"minscore=f", VAR=>\$minscore, DEFAULT=>50, DESC=>"Minumum score out of 100 to match a scheme (when auto --scheme)"},
356362
"PATHS",
357363
{OPT=>"blastdb=s", VAR=>\$blastdb, DEFAULT=>"$FindBin::RealBin/../db/blast/mlst.fa", DESC=>"BLAST database"},
358364
{OPT=>"datadir=s", VAR=>\$datadir, DEFAULT=>"$FindBin::RealBin/../db/pubmlst", DESC=>"PubMLST data"},

0 commit comments

Comments
 (0)