Skip to content

Commit

Permalink
Merge pull request #930: filter: Report min and max date separately
Browse files Browse the repository at this point in the history
  • Loading branch information
victorlin authored May 24, 2022
2 parents 9dda176 + 1cdde92 commit 99c4d05
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 5 deletions.
34 changes: 29 additions & 5 deletions augur/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,22 @@ def filter_by_date(metadata, date_column="date", min_date=None, max_date=None):
return filtered


def filter_by_min_date(metadata, min_date, **kwargs):
"""Filter metadata by minimum date.
Alias to filter_by_date using min_date only.
"""
return filter_by_date(metadata, min_date=min_date, **kwargs)


def filter_by_max_date(metadata, max_date, **kwargs):
"""Filter metadata by maximum date.
Alias to filter_by_date using max_date only.
"""
return filter_by_date(metadata, max_date=max_date, **kwargs)


def filter_by_sequence_index(metadata, sequence_index):
"""Filter metadata by presence of corresponding entries in a given sequence
index. This filter effectively intersects the strain ids in the metadata and
Expand Down Expand Up @@ -616,14 +632,21 @@ def construct_filters(args, sequence_index):
}
))

# Filter by date.
if args.min_date or args.max_date:
# Filter by min/max date.
if args.min_date:
exclude_by.append((
filter_by_date,
filter_by_min_date,
{
"date_column": "date",
"min_date": args.min_date,
"date_column": "date",
}
))
if args.max_date:
exclude_by.append((
filter_by_max_date,
{
"max_date": args.max_date,
"date_column": "date",
}
))

Expand Down Expand Up @@ -1659,7 +1682,8 @@ def run(args):
"filter_by_exclude_where": "{count} of these were dropped because of '{exclude_where}'",
"filter_by_query": "{count} of these were filtered out by the query: \"{query}\"",
"filter_by_ambiguous_date": "{count} of these were dropped because of their ambiguous date in {ambiguity}",
"filter_by_date": "{count} of these were dropped because of their date (or lack of date)",
"filter_by_min_date": "{count} of these were dropped because they were earlier than {min_date} or missing a date",
"filter_by_max_date": "{count} of these were dropped because they were later than {max_date} or missing a date",
"filter_by_sequence_length": "{count} of these were dropped because they were shorter than minimum length of {min_length}bp",
"filter_by_non_nucleotide": "{count} of these were dropped because they had non-nucleotide characters",
"skip_group_by_with_ambiguous_year": "{count} were dropped during grouping due to ambiguous year information",
Expand Down
12 changes: 12 additions & 0 deletions tests/functional/filter.t
Original file line number Diff line number Diff line change
Expand Up @@ -499,3 +499,15 @@ Error on missing group-by columns.
$ cat $TMP/metadata-filtered.tsv
cat: .*: No such file or directory (re)
[1]

Check output of min/max date filters.

$ ${AUGUR} filter \
> --metadata filter/metadata.tsv \
> --min-date 2015-01-01 \
> --max-date 2016-02-01 \
> --output-metadata "$TMP/filtered_metadata.tsv"
8 strains were dropped during filtering
\t1 of these were dropped because they were earlier than 2015.0 or missing a date (esc)
\t7 of these were dropped because they were later than 2016.09 or missing a date (esc)
4 strains passed all filters

0 comments on commit 99c4d05

Please sign in to comment.