Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

filter: Report min and max date separately #930

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 29 additions & 5 deletions augur/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,22 @@ def filter_by_date(metadata, date_column="date", min_date=None, max_date=None):
return filtered


def filter_by_min_date(metadata, min_date, **kwargs):
"""Filter metadata by minimum date.

Alias to filter_by_date using min_date only.
"""
return filter_by_date(metadata, min_date=min_date, **kwargs)


def filter_by_max_date(metadata, max_date, **kwargs):
"""Filter metadata by maximum date.

Alias to filter_by_date using max_date only.
"""
return filter_by_date(metadata, max_date=max_date, **kwargs)


def filter_by_sequence_index(metadata, sequence_index):
"""Filter metadata by presence of corresponding entries in a given sequence
index. This filter effectively intersects the strain ids in the metadata and
Expand Down Expand Up @@ -616,14 +632,21 @@ def construct_filters(args, sequence_index):
}
))

# Filter by date.
if args.min_date or args.max_date:
# Filter by min/max date.
if args.min_date:
exclude_by.append((
filter_by_date,
filter_by_min_date,
{
"date_column": "date",
"min_date": args.min_date,
"date_column": "date",
}
))
if args.max_date:
exclude_by.append((
filter_by_max_date,
{
"max_date": args.max_date,
"date_column": "date",
}
))

Expand Down Expand Up @@ -1659,7 +1682,8 @@ def run(args):
"filter_by_exclude_where": "{count} of these were dropped because of '{exclude_where}'",
"filter_by_query": "{count} of these were filtered out by the query: \"{query}\"",
"filter_by_ambiguous_date": "{count} of these were dropped because of their ambiguous date in {ambiguity}",
"filter_by_date": "{count} of these were dropped because of their date (or lack of date)",
"filter_by_min_date": "{count} of these were dropped because they were earlier than {min_date} or missing a date",
"filter_by_max_date": "{count} of these were dropped because they were later than {max_date} or missing a date",
"filter_by_sequence_length": "{count} of these were dropped because they were shorter than minimum length of {min_length}bp",
"filter_by_non_nucleotide": "{count} of these were dropped because they had non-nucleotide characters",
"skip_group_by_with_ambiguous_year": "{count} were dropped during grouping due to ambiguous year information",
Expand Down
12 changes: 12 additions & 0 deletions tests/functional/filter.t
Original file line number Diff line number Diff line change
Expand Up @@ -499,3 +499,15 @@ Error on missing group-by columns.
$ cat $TMP/metadata-filtered.tsv
cat: .*: No such file or directory (re)
[1]

Check output of min/max date filters.

$ ${AUGUR} filter \
> --metadata filter/metadata.tsv \
> --min-date 2015-01-01 \
> --max-date 2016-02-01 \
> --output-metadata "$TMP/filtered_metadata.tsv"
8 strains were dropped during filtering
\t1 of these were dropped because they were earlier than 2015.0 or missing a date (esc)
\t7 of these were dropped because they were later than 2016.09 or missing a date (esc)
4 strains passed all filters