From eb36c696a68c17ea7db8d2adf0924c6233dbd47f Mon Sep 17 00:00:00 2001
From: Victor Lin <13424970+victorlin@users.noreply.github.com>
Date: Tue, 17 May 2022 10:05:27 -0700
Subject: [PATCH 1/2] Report min and max date separately

This allows for more detail in the report, and makes it more consistent by having one report line per parameter.
---
 augur/filter.py | 34 +++++++++++++++++++++++++++++-----
 1 file changed, 29 insertions(+), 5 deletions(-)

diff --git a/augur/filter.py b/augur/filter.py
index cd3b0a243..2633aab4f 100644
--- a/augur/filter.py
+++ b/augur/filter.py
@@ -342,6 +342,22 @@ def filter_by_date(metadata, date_column="date", min_date=None, max_date=None):
     return filtered
 
 
+def filter_by_min_date(metadata, min_date, **kwargs):
+    """Filter metadata by minimum date.
+
+    Alias to filter_by_date using min_date only.
+    """
+    return filter_by_date(metadata, min_date=min_date, **kwargs)
+
+
+def filter_by_max_date(metadata, max_date, **kwargs):
+    """Filter metadata by maximum date.
+
+    Alias to filter_by_date using max_date only.
+    """
+    return filter_by_date(metadata, max_date=max_date, **kwargs)
+
+
 def filter_by_sequence_index(metadata, sequence_index):
     """Filter metadata by presence of corresponding entries in a given sequence
     index. This filter effectively intersects the strain ids in the metadata and
@@ -616,14 +632,21 @@ def construct_filters(args, sequence_index):
             }
         ))
 
-    # Filter by date.
-    if args.min_date or args.max_date:
+    # Filter by min/max date.
+    if args.min_date:
         exclude_by.append((
-            filter_by_date,
+            filter_by_min_date,
             {
-                "date_column": "date",
                 "min_date": args.min_date,
+                "date_column": "date",
+            }
+        ))
+    if args.max_date:
+        exclude_by.append((
+            filter_by_max_date,
+            {
                 "max_date": args.max_date,
+                "date_column": "date",
             }
         ))
 
@@ -1659,7 +1682,8 @@ def run(args):
         "filter_by_exclude_where": "{count} of these were dropped because of '{exclude_where}'",
         "filter_by_query": "{count} of these were filtered out by the query: \"{query}\"",
         "filter_by_ambiguous_date": "{count} of these were dropped because of their ambiguous date in {ambiguity}",
-        "filter_by_date": "{count} of these were dropped because of their date (or lack of date)",
+        "filter_by_min_date": "{count} of these were dropped because they were earlier than {min_date} or missing a date",
+        "filter_by_max_date": "{count} of these were dropped because they were later than {max_date} or missing a date",
         "filter_by_sequence_length": "{count} of these were dropped because they were shorter than minimum length of {min_length}bp",
         "filter_by_non_nucleotide": "{count} of these were dropped because they had non-nucleotide characters",
         "skip_group_by_with_ambiguous_year": "{count} were dropped during grouping due to ambiguous year information",

From 1cdde926f405cf6ac134861440119f045c6d60d4 Mon Sep 17 00:00:00 2001
From: Victor Lin <13424970+victorlin@users.noreply.github.com>
Date: Tue, 17 May 2022 10:06:09 -0700
Subject: [PATCH 2/2] Add cram test for report output of filtering by min/max
 date

---
 tests/functional/filter.t | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/tests/functional/filter.t b/tests/functional/filter.t
index e0fd55997..691dc08f9 100644
--- a/tests/functional/filter.t
+++ b/tests/functional/filter.t
@@ -499,3 +499,15 @@ Error on missing group-by columns.
   $ cat $TMP/metadata-filtered.tsv
   cat: .*: No such file or directory (re)
   [1]
+
+Check output of min/max date filters.
+
+  $ ${AUGUR} filter \
+  >  --metadata filter/metadata.tsv \
+  >  --min-date 2015-01-01 \
+  >  --max-date 2016-02-01 \
+  >  --output-metadata "$TMP/filtered_metadata.tsv"
+  8 strains were dropped during filtering
+  \t1 of these were dropped because they were earlier than 2015.0 or missing a date (esc)
+  \t7 of these were dropped because they were later than 2016.09 or missing a date (esc)
+  4 strains passed all filters